#' @title Restructuring of length frequency data
#'
#' @description First step of the Electronic LEngth Frequency ANalysis (ELEFAN),
#' which is restructuring length-frequency data (lfq).
#' This is done according to a certain protocol, described by many authors (see
#' Details or References for more information).
#'
#' @param param a list consisting of following parameters:
#' \itemize{
#' \item \strong{midLengths} midpoints of the length classes
#' \item \strong{dates} dates of sampling times (class Date)
#' \item \strong{catch} matrix with catches/counts per length class (row) and
#' sampling date (column)
#' }
#' @param MA number indicating over how many length classes the moving average
#' should be performed (default: 5)
#' @param addl.sqrt additional squareroot transformation of positive values
#' according to Brey et al. (1988) (default: FALSE).
#' Particularly useful if many observations have a low frequency (<10)
#'
#' @examples
#' # data and plot of catch frequencies
#' data(synLFQ4)
#' plot(synLFQ4, Fname="catch")
#'
#' # restructuring and calculation of ASP
#' synLFQ4 <- lfqRestructure(synLFQ4, MA=11)
#' synLFQ4$ASP
#'
#' # plot of restructured scores and fit of soVBGF growth curves
#' plot(synLFQ4)
#' lfqFitCurves(synLFQ4,
#' par=list(Linf=80, K=0.5, t_anchor=0.25, C=0.75, ts=0),
#' draw=TRUE
#' )$fASP
#'
#'
#' @details This function is used prior to fitting of growth curves (e.g. in
#' \code{\link{ELEFAN}}, \code{\link{ELEFAN_SA}} functions). It restructures a length
#' frequency data set according to a list of steps to emphasise cohorts in the data.
#' The steps can be found in various publications, see e.g. Brey et al. (1988) or
#' Pauly and David (1981). Here, the most recent steps documented in Gayanilo (1997)
#' are followed.
#'
#' @return A list with the input parameters and following list objects:
#' \itemize{
#' \item \strong{rcounts}: restructured frequencies,
#' \item \strong{peaks_mat}: matrix with uniquely numbered positive peaks,
#' \item \strong{ASP}: available sum of peaks, sum of posititve peaks which
#' could be potential be hit by growth curves. This is calculated as the sum of
#' maximum values from each run of posive restructured scores,
#' \item \strong{MA}: moving average used for restructuring.
#' }
#'
#'
#' @references
#' Brey, T., Soriano, M., and Pauly, D. 1988. Electronic length frequency analysis:
#' a revised and expanded user's guide to ELEFAN 0, 1 and 2.
#'
#' Gayanilo, Felimon C. FAO-ICLARM stock assessment tools: reference manual.
#' No. 8. Food & Agriculture Org., 1997.
#'
#' Pauly, D. 1981. The relationship between gill surface area and growth performance in fish:
#' a generalization of von Bertalanffy's theory of growth. \emph{Meeresforsch}. 28:205-211
#'
#' Pauly, D. and N. David, 1981. ELEFAN I, a BASIC program for the objective extraction of
#' growth parameters from length-frequency data. \emph{Meeresforschung}, 28(4):205-211
#'
#' Pauly, D., 1985. On improving operation and use of ELEFAN programs. Part I: Avoiding
#' "drift" of K towards low values. \emph{ICLARM Conf. Proc.}, 13-14
#'
#' Pauly, D., 1987. A review of the ELEFAN system for analysis of length-frequency data in
#' fish and aquatic invertebrates. \emph{ICLARM Conf. Proc.}, (13):7-34
#'
#' Pauly, D. and G. R. Morgan (Eds.), 1987. Length-based methods in fisheries research.
#' (No. 13). WorldFish
#'
#' Pauly, D. and G. Gaschuetz. 1979. A simple method for fitting oscillating length
#' growth data, with a program for pocket calculators. I.C.E.S. CM 1979/6:24.
#' Demersal Fish Cttee, 26 p.
#'
#' Pauly, D. 1984. Fish population dynamics in tropical waters: a manual for use
#' with programmable calculators (Vol. 8). WorldFish.
#'
#' Quenouille, M. H., 1956. Notes on bias in estimation. \emph{Biometrika}, 43:353-360
#'
#' Somers, I. F., 1988. On a seasonally oscillating growth function.
#' ICLARM Fishbyte 6(1): 8-11.
#'
#' Sparre, P., Venema, S.C., 1998. Introduction to tropical fish stock assessment.
#' Part 1. Manual. \emph{FAO Fisheries Technical Paper}, (306.1, Rev. 2): 407 p.
#'
#' Tukey, J., 1958. Bias and confidence in not quite large samples.
#' \emph{Annals of Mathematical Statistics}, 29: 614
#'
#' Tukey, J., 1986. The future of processes of data analysis. In L. V. Jones (Eds.),
#' The Collected Works of John W. Tukey-philosophy and principles of data analysis:
#' 1965-1986 (Vol. 4, pp. 517-549). Monterey, CA, USA: Wadsworth & Brooks/Cole
#'
#' @export
lfqRestructure <- function(param, MA=5, addl.sqrt=FALSE){
lfq <- param
# replace NAs in catch
lfq$catch[which(is.na(lfq$catch))] <- 0
if(MA%%2 == 0) stop("MA must be an odd integer")
# Steps refer to Gayanilo (1997) FAO-ICLARM stock assessment tools: reference manual
rcounts <- 0*lfq$catch
for(i in seq(ncol(lfq$catch))){
pm <- (MA-1)/2 # plus minus
# positions of first and last non-zero valules
if(all(lfq$catch[,i] == 0)) stop(paste0("Sample ", i, " (Date: ",lfq$dates[i],") contains only zeros! Please remove this sample and run again!"))
val_first <- min(which(lfq$catch[,i] != 0))
val_last <- max(which(lfq$catch[,i] != 0))
val_pos <- seq(val_first, val_last)
val_string <- lfq$catch[val_pos,i]
# number of values
n <- length(val_string)
AF <- NaN*val_string
nz <- NaN*val_string
if(n > 1){
temp <- seq(val_string)
}else{
temp <- 1
}
## Steps A & B - Computation of the moving average
for(j in temp){
idx <- (j-pm):(j+pm)
idx <- idx[which(idx %in% temp)]
idxn <- idx[-which(idx==j)] # neighbors only
nz[j] <- sum(val_string[idxn] == 0) + (MA-length(idx)) # number of adjacent zeros
MA.j <- sum(val_string[idx])/MA
AF[j] <- val_string[j]/MA.j
}
# intermediate step to remove Inf or NA
AF <- replace(AF, which(AF==Inf | is.na(AF)), 0)
# Calculate mean quotient
mprime <- mean(AF, na.rm=TRUE)
## Step C Divide by mean quotient and subtract 1.0
Fs <- AF / mprime - 1 # restructured frequencies
## Steps D & E - Identify isolated peaks; Adjust for zero frequency
posFs <- which(Fs > 0)
if(length(posFs)>0) {Fs[posFs] <- (Fs * 0.5^nz)[posFs]}
# replace ultimate length bin with zero if negative
if(sign(Fs[length(Fs)]) == -1){Fs[length(Fs)] <- 0}
# divide penultimate length bin by 2 if negative
if(length(sign(Fs[length(Fs)-1])) > 0 &&
sign(Fs[length(Fs)-1]) == -1){Fs[length(Fs)-1] <- Fs[length(Fs)-1]*0.5}
## Step F - Adjust for Fi
SPV <- sum(Fs[which(Fs > 0)]) # sum of positive values
SNV <- sum(Fs[which(Fs < 0)]) # sum of negative values
# set -1 to 0
minus1 <- which((1+Fs) < 1e-8 | is.na(Fs))
if(length(minus1)>0) {Fs[minus1] <- 0}
# adjust negative numbers
isneg <- which(Fs < 0)
Fs[isneg] <- Fs[isneg] * (SPV/-SNV)
# optional square-root adjustment to emphasize larger length bins with lower counts
if(addl.sqrt){
posFs <- which(Fs > 0)
if(length(posFs)>0) {Fs[posFs] <- Fs[posFs] / sqrt(1+2/lfq$catch[posFs,i])} #Fs[posFs] / sqrt(1+2/Fs[posFs])}
}
rcounts[val_pos,i] <- Fs
}
lfq$rcounts <- rcounts
# create peak matrix
prep_mat <- lfq$rcounts
prep_mat <- ifelse(prep_mat > 0,1,0)
peaks_mat <- NA*prep_mat
for(i in seq(ncol(peaks_mat))){
vec_peaki <- prep_mat[,i]
runs <- rle(vec_peaki)
rle_val <- runs$values
rle_val[which(rle_val == 1)] <- 1:length(rle_val[which(rle_val == 1)])
peaks_mat[,i] <- rep(rle_val, runs$lengths)
}
maxn.peaks <- max(peaks_mat, na.rm=TRUE)
peaks_mat <- peaks_mat + (prep_mat * maxn.peaks * col(peaks_mat))
lfq$peaks_mat <- peaks_mat
# ASP calc
sampASP <- NaN*seq(ncol(rcounts))
for(i in seq(ncol(rcounts))){
## lfq.i <- lfq[i,]
tmp <- rle(sign(rcounts[,i]))
start.idx <- c(1, cumsum(tmp$lengths[-length(tmp$lengths)])+1)
end.idx <- cumsum(tmp$lengths)
posrun <- which(tmp$values == 1)
peakval <- NaN*posrun
if(length(posrun) > 0){
for(p in seq(length(posrun))){
peakval[p] <- max(rcounts[start.idx[posrun[p]]:end.idx[posrun[p]], i ])
}
sampASP[i] <- sum(peakval)
}else{
sampASP[i] <- 0
}
}
ASP <- sum(sampASP)
lfq$ASP <- ASP
lfq$MA <- MA
class(lfq) <- "lfq"
return(lfq)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.