R/Bhattacharya.R
In TropFishR: Tropical Fisheries Analysis

Documented in Bhattacharya

#' @title Bhattacharya's method
#'
#' @description Find relative frequencies and frequency distribution of distinct cohorts
#'      in the observed length frequency distribution by resolving it into
#'      Gaussian components.
#'
#' @param param a list consisting of following parameters:
#' \itemize{
#'   \item \code{midLengths} midpoints of the length class as vector,
#'   \item \code{catch} a vector with the catch per length class or a matrix with
#'      catches per length class of subsequent years;
#' }
#' @param n_rnorm number of observations for the function \code{\link{rnorm}}. The
#'    default is 1000.
#' @param savePlots logical; indicating whether the analyis graphs should be recorded
#'
#' @keywords function Bhattacharya length-frequency lfq
#'
#' @examples
#' # The following example requires to choose certain values for the regression analyses:
#' # first cohort:   point  2 to  8
#' # second cohort:  point 12 to 17
#' # third cohort:   point 19 to 23
#' # fourth cohort:  point 26 to 30
#'
#' \donttest{
#'  data(synLFQ1)
#'  Bhattacharya(param = synLFQ1)
#' }
#'
#' @details This method includes the \code{\link{identify}} function, which allows to
#'      choose points on a graphical device manually. To stop this process please press
#'      right mouse click on the graph, and in case you are working on a windows
#'      maschine click on "Stop". An error will be caused if the graphical device
#'      is closed manually. After you have selected the points for regression analysis
#'      you will be asked if you want to redo the selection or if you want to continue.
#'      Please enter in the Console "y" for continuation if you are satisfied
#'      with your selection and the corresponding Gaussian distribution or enter
#'      "redo" if you want to repeat the selection procedure. This function
#'      allows a maximum of 12 cohorts or seperate distributions in one sample. Please
#'      find more details in the Vignette of this package or in the FAO manual by
#'      Sparre and Venema (1998) (\strong{Section 3.4.1, p. 80}).
#'
#' @return A list with the input parameters and
#'    \itemize{
#'    \item \strong{regressionLines} dataframe with intercept, slope, start and end points
#'       of the regression lines,
#'    \item \strong{Lmean_SD_list} dataframe with the mean length (Lmean), standard deviation (SD),
#'      and seperation index (SI) for each cohort,
#'    \item \strong{bhat_results} dataframe with the results of the Bhattacharya method,
#'    \item \strong{distributions} list with the x and y values of selected distributions,
#'    \item \strong{cohort_plots} list with analysis plots (when savePlots = TRUE).
#'    }
#'
#' @importFrom grDevices dev.new dev.off recordPlot
#' @importFrom graphics abline hist identify layout lines par plot points text title
#' @importFrom stats dnorm lm rnorm sd
#' @importFrom utils flush.console
#'
#' @references
#' Bhattacharya, C.G., 1967. A simple method of resolution of a distribution into Gaussian
#' components, \emph{Biometrics}, 23:115-135
#'
#' Sparre, P., Venema, S.C., 1998. Introduction to tropical fish stock assessment.
#' Part 1. Manual. \emph{FAO Fisheries Technical Paper}, (306.1, Rev. 2). 407 p.
#'
#' @export


Bhattacharya <- function(param, n_rnorm = 1000, savePlots = FALSE){

    if(interactive()){

  res <- param
  if("midLengths" %in% names(res) == TRUE){
    midLengths <- as.character(res$midLengths)
    # create column without plus group (sign) if present
    midLengths.num <- do.call(rbind,strsplit(midLengths, split="\\+"))
    midLengths.num <- as.numeric(midLengths.num[,1])
  }else stop(
    noquote('The parameter list does not contain an object with name "midLengths"'))
  if("catch" %in% names(res) == TRUE){
    catch <- res$catch
  }else stop(
    noquote('The parameter list does not contain an object with name "catch"'))

  #Transform matrix into vector if provided
  if(inherits(catch,'matrix')){
    catch.vec <- rowSums(catch, na.rm = TRUE)
  }else catch.vec <- catch
  if(length(midLengths.num) != length(catch.vec)) stop(
    noquote("midLengths and catch do not have the same length!"))

  #calculate size class interval
  interval <- midLengths.num[2] - midLengths.num[1]

  #STEP 1: fills second column of bhat with counts per size class
  bhat.table <- data.frame('mean.length.classes' = midLengths.num,
                           'N1.plus' = catch.vec,
                           'log.N1.plus' = NA,
                           'delta.log.N1.plus' = NA,
                           'L' = NA,
                           'delta.log.N' = NA,
                           'log.N1' = NA,
                           'N1' = NA,
                           'N2.plus' = NA)

  bhat.table.list <- vector("list", 13)
  a.b.list <- vector("list", 12)
  l.s.list <- vector("list", 12)
  temp.list <- vector("list", 12)
  coho_plot <- vector("list",12)


  colour.xy <- c('blue','darkgreen','red','goldenrod2','purple','orange',
                 'lightgreen','skyblue','brown','darkblue','darkorange','darkred')
  colour.vec <- rep('black', length(bhat.table$L))

  writeLines("Starting on the left, please choose from black points which \nlie on a straight line! Do not include points which might be \naffected by the next distribution! \nTo stop the process press right click (and choose \n'Stop' if necessary)")
  flush.console()

  for(xy in 1:12){

    #STEP 2: fills third column
    bhat.table$log.N1.plus <- round(log(bhat.table$N1.plus),digits=3)

    #STEP 3: fills fourth coulmn
    if(xy > 1) bhat.table$log.N1.plus[last_choices[2]] <- 0
    for(i in 2:length(bhat.table$log.N1.plus)){
      delta.value <- bhat.table$log.N1.plus[i] - bhat.table$log.N1.plus[i-1]
      bhat.table$delta.log.N1.plus[i] <- delta.value
    }

    #STEP 4: fills fifth coulmn
    checki <- !is.na(bhat.table$delta.log.N1.plus) &
      bhat.table$delta.log.N1.plus != 'Inf'
    bhat.table$L[checki] <- bhat.table$mean.length.classes[checki] - (interval/2)

    #STEP 5: plot of fifth against fourth column
    checki2 <- !is.na(bhat.table$delta.log.N1.plus) &
      bhat.table$delta.log.N1.plus != 'Inf'
    bhat.table$L[checki2] <- (bhat.table$mean.length.classes[checki2] - (interval/2))

    #STEP 6: select points for regression line
    if(xy == 1){bhat.table.list[[1]] <- bhat.table}

    repeat {

      if("id.co1" %in% ls()){
        if(length(id.co1$ind) == 0) break
      }

      repeat {
        # histogramm
        dev.new()#noRStudioGD = TRUE)
        par(mfrow = c(2,1),
            oma = c(6.5, 0.5, 2, 1) + 0.1,
            mar = c(0, 4, 0.5, 0) + 0.1)
        layout(matrix(c(1,2),nrow=2), heights = c(1,2.5))

        freqis <- rep(bhat.table.list[[1]]$mean.length.classes, bhat.table.list[[1]]$N1.plus)
        if(xy == 1){
          hist(freqis, breaks = 50, main = '', xaxt = 'n', probability = TRUE)
          mtext(side = 3, "Click on two points. Escape to Quit.",
                      xpd = NA, cex = 1.25)
        }
        if(xy > 1){
          hist(freqis, breaks = 50, main = '', xaxt = 'n',
                        probability = TRUE, ylim = c(0,maxlim))
          mtext(side = 3, "Click on two points. Escape to Quit.",
                      xpd = NA, cex = 1.25)
        }
        if(xy > 1){
          for(tempRep in 1:(xy-1)){
            lines(temp.list[[tempRep]]$xfit, temp.list[[tempRep]]$yyfit,
                  col=colour.xy[tempRep], lwd=1.5)
          }
        }
        # bhatta plot
        if(xy == 1){
          plot(bhat.table.list[[1]]$L, bhat.table.list[[1]]$delta.log.N1.plus,
               col = colour.vec, pch = 16,
               ylab = expression(paste(delta," log N+")))
          abline(h = 0)
          text(bhat.table.list[[1]]$L, bhat.table.list[[1]]$delta.log.N1.plus,
               labels = row.names(bhat.table.list[[1]]), cex= 0.7, pos=3)
          title(xlab = "L", outer = TRUE, line = 2.5)
        }
        if(xy > 1){
          plot(bhat.table.list[[1]]$L, bhat.table.list[[1]]$delta.log.N1.plus, pch = 4,
               col = colour.vec,
               ylab = expression(paste(delta," log N+")))
          abline(h = 0)
          abline(a = a.b.list[[xy-1]][1], b=a.b.list[[xy-1]][2],
                 col=colour.xy[xy-1], lwd = 1.5)
          points(bhat.table$L[(last_choices[2]+1):length(bhat.table$L)],
                 bhat.table$delta.log.N1.plus[(last_choices[2]+1):length(bhat.table$L)],
                 col = 'black', pch = 16)
          text(bhat.table$L[(last_choices[2]+1):length(bhat.table$L)],
               bhat.table$delta.log.N1.plus[(last_choices[2]+1):length(bhat.table$L)],
               labels=row.names(bhat.table.list[[1]][
                 (last_choices[2]+1):length(bhat.table$delta.log.N1.plus),]),
               cex = 0.7, pos = 3)
          title(xlab = "L", outer = TRUE, line = 2.5)
        }
        # identify regression line
        if(xy > 1) rm(id.co1)
        id.co1 <- identify(bhat.table$L, bhat.table$delta.log.N1.plus, n = 2, pos = TRUE)
        if(length(id.co1$ind) == 0) break

        colour.vec[id.co1$ind[1]:id.co1$ind[2]] <- colour.xy[xy]
        dev.off()

        #STEP 7: calculate mean length and standard deviation of regression line
        x.co1 <- bhat.table$L[id.co1$ind[1]:id.co1$ind[2]]

        # Break loop if selection does not embrace at least 3 points
        if(length(x.co1) < 3){
          writeLines("Your selection is not possible. You have to choose two \npoints which include at least one other point. At least \nthree points are required for a regression line. Please choose again!")
          flush.console()
        }
        if(length(x.co1) >= 3){
          break
        }
      }

      if(xy == 1 & length(id.co1$ind) == 0){
        writeLines("You did not choose any points! Please run the function again \nand choose points to include in the calculation of the \nGaussian components of the observed frequency.")
        flush.console()
        cancel <- TRUE
        break
      }
      if(xy > 1 & length(id.co1$ind) == 0){
        cancel <- FALSE
        break
      }

      y.co1 <- bhat.table$delta.log.N1.plus[id.co1$ind[1]:id.co1$ind[2]]
      m.co1 <- lm(y.co1 ~ x.co1)
      sum.m.co1 <- summary(m.co1)
      a.co1 <- sum.m.co1$coefficients[1]
      b.co1 <- sum.m.co1$coefficients[2]
        l.mean.co1 <- -a.co1/b.co1        ## mean length: L(mean)(N1) = -a/b p.83
        s.co1 <- sqrt(-interval/b.co1)    ## standard deviation: s(N1) = sqrt(-dL/b) p.83

      a.b.list[[xy]] <- c(a.co1,b.co1)
      l.s.list[[xy]] <- c(l.mean.co1,s.co1)


      #STEP 8: fill sixth column
      #set.seed ????
      normal.dis.co1 <- rnorm(n=n_rnorm, mean = l.mean.co1, sd=s.co1)
      max.class.ind.co1 <- which(round(max(normal.dis.co1), digits = 0) >= (
        bhat.table$mean.length.classes - (interval/2)) &
          round(max(normal.dis.co1),digits = 0) < (
            bhat.table$mean.length.classes + (interval/2)))
      if(length(max.class.ind.co1) == 0) max.class.ind.co1 <- length(bhat.table$mean.length.classes)
      max.class.co1 <- bhat.table$mean.length.classes[max.class.ind.co1]
      for(i in 1:max.class.ind.co1){
        delta.N <- round(a.co1 + b.co1 * bhat.table$L[i],digits=3)
        bhat.table$delta.log.N[i] <- delta.N
      }


      # Draw plot for checking
      if(xy == 1) min.class.ind.co1 <- 1
      if(xy > 1){
        min.class.ind.co1 <- which(round(min(normal.dis.co1), digits = 0) >= (
          bhat.table$mean.length.classes - (interval/2)) &
            round(min(normal.dis.co1),digits = 0) < (
              bhat.table$mean.length.classes + (interval/2)))
        if(length(min.class.ind.co1) == 0) min.class.ind.co1 <- 1
      }
      min.class.co1 <- bhat.table$mean.length.classes[min.class.ind.co1]

      xfit <- seq(min.class.co1, max.class.co1, length = 100)
      yfit <- dnorm(xfit, mean=l.mean.co1, sd=s.co1)

      freqis <- rep(bhat.table.list[[1]]$mean.length.classes,
                    bhat.table.list[[1]]$N1.plus)
      h <- hist(freqis, breaks = 50, plot = FALSE)

      dev.new()#noRStudioGD = TRUE)
      par(mfrow = c(2,1),
          oma = c(6.5, 0.5, 2, 1) + 0.1,
          mar = c(0, 4, 0.5, 0) + 0.1)
      layout(matrix(c(1,2),nrow=2), heights = c(1,2.5))
      # histogram
      yyfit <-  yfit * abs((max(yfit) - max(h$density)) / max(yfit))
      if(xy == 1) maxlim <- ifelse(max(yyfit) > max(h$density), max(yyfit), max(h$density))
      hist(freqis, breaks = 50, main = "", xaxt = 'n',
           probability = TRUE, ylim = c(0,maxlim))
      if(xy > 1){
        for(tempRep in 1:(xy-1)){
          lines(temp.list[[tempRep]]$xfit, temp.list[[tempRep]]$yyfit,
                col=colour.xy[tempRep], lwd=1.5)
        }
      }
      lines(xfit, yyfit, col=colour.xy[xy], lwd=1.5)
      temp.list[[xy]] <- list(xfit = xfit,
                              yyfit = yyfit)
      # bhatta plot
      plot(bhat.table.list[[1]]$L, bhat.table.list[[1]]$delta.log.N1.plus, pch = 4,
           col = colour.vec,
           ylab = expression(paste(delta," log N+")))
      abline(h = 0)
      abline(a = a.co1, b=b.co1, col=colour.xy[xy], lwd = 1.5)
      points(bhat.table$L[(id.co1$ind[2]+1):length(bhat.table$L)],
             bhat.table$delta.log.N1.plus[(id.co1$ind[2]+1):length(bhat.table$L)],
             col = 'black', pch = 16)
      text(bhat.table$L[(id.co1$ind[1]):(id.co1$ind[2])],
           bhat.table$delta.log.N1.plus[(id.co1$ind[1]):(id.co1$ind[2])],
           labels=row.names(bhat.table.list[[1]][(id.co1$ind[1]):
             (id.co1$ind[2]),]),
           cex= 0.7, pos=3)
      points(bhat.table$L[(id.co1$ind[1]):(id.co1$ind[2])],
             bhat.table$delta.log.N1.plus[(id.co1$ind[1]):(id.co1$ind[2])],
             col = colour.xy[xy], pch = 16)
      text(bhat.table$L[(id.co1$ind[2]+1):length(bhat.table$L)],
           bhat.table$delta.log.N1.plus[(id.co1$ind[2]+1):length(bhat.table$L)],
           labels=row.names(bhat.table.list[[1]][
             (id.co1$ind[2]+1):length(bhat.table$delta.log.N1.plus),]),
           cex= 0.7, pos=3)
      title(xlab = "L", outer = TRUE, line = 2.5)

      if(savePlots == TRUE) coho_plot[[xy]] <- recordPlot() ##coho_plot[xy] <- quartz.save(file = paste("Cohort",xy,"plot", sep='_'),type = "jpeg")

      repSel <- readline(prompt = writeLines("Are you satisfied with your selection and want to continue? \n'y' or 'redo':"))

      if(repSel == '') repSel <- readline(prompt = writeLines("Please type 'y' or 'redo' into the console! \nAre you satisfied with your selection and want to continue? \n'y' or 'redo':"))

      dev.off()

      if(repSel == 'y') break
    }

    #STEP 9: fills one value in seventh column and one in eigth column, get clean starting value
    if(length(id.co1$ind) == 0) break
    choosen <- seq(id.co1$ind[1],id.co1$ind[2],interval)
    # if odd, take midpoint:
    if(length(choosen) %% 2 == 1){
      mid_choice <- id.co1$ind[1] + (id.co1$ind[2]-id.co1$ind[1])/2
    }else{
      # if even, take one after midpoint:
      mid_choice <- (id.co1$ind[1] + (id.co1$ind[2]-id.co1$ind[1])/2) + (interval/2)
    }
    bhat.table$log.N1[mid_choice] <- log(bhat.table$N1.plus[mid_choice])
    bhat.table$N1[mid_choice] <- bhat.table$N1.plus[mid_choice]


    #STEP 10: fills rest of seventh column
    for(i in (mid_choice+1):length(bhat.table$delta.log.N)){
      log.N1 <- bhat.table$log.N1[i-1] + bhat.table$delta.log.N[i]
      bhat.table$log.N1[i] <- log.N1
    }

    #STEP 11: fills rest of eigth column
    for(i in (mid_choice+1):length(bhat.table$delta.log.N)){
      N1i <- round(exp(bhat.table$log.N1[i]),digits=2)
      bhat.table$N1[i] <- N1i
    }

    #STEP 12: fills ninth column
    bhat.table$N1[which(which(is.na(bhat.table$N1)) <
                          max.class.ind.co1)] <- bhat.table$N1.plus[
                            which(which(is.na(bhat.table$N1)) <
                                    max.class.ind.co1)]

    bhat.table$N2.plus <- bhat.table$N1.plus - bhat.table$N1

    bhat.table$N2.plus[which(is.na(bhat.table$N2.plus))] <-
      bhat.table$N1.plus[which(is.na(bhat.table$N2.plus))]


    #for creation of new bhat table
    bhat.table2 <- bhat.table
    bhat.table2$N2.plus[which(bhat.table2$N2.plus < 0)] <- 0
    bhat.table2$N1[which(bhat.table2$N2.plus < 0)] <- bhat.table2$N1.plus

    #creation of new table for continuation
    bhat.table <- data.frame('mean.length.classes' = bhat.table2$mean.length.classes,
                             'N1.plus' = bhat.table2$N2.plus,
                             'log.N1.plus' = NA,
                             'delta.log.N1.plus' = NA,
                             'L' = NA,
                             'delta.log.N' = NA,
                             'log.N1' = NA,
                             'N1' = NA,
                             'N2.plus' = NA)

    #arrange dataframe for saving
    colnames(bhat.table2) <- c('mean.length.classes',paste('N',xy,'.plus',sep=''),
                               paste('log.N',xy,'.plus',sep=''),
                               paste('delta.log.N',xy,'.plus',sep=''),'L',
                               'delta.log.N',paste('log.N',xy,sep=''),
                               paste('N',xy,sep=''),paste('N',xy+1,'.plus',sep=''))

    #save data
    bhat.table.list[[xy+1]] <- bhat.table2
    last_choices <- id.co1$ind
    a.b.list[[xy]] <- c(a.b.list[[xy]],id.co1$ind[1],id.co1$ind[2])
  }

  # Bhatta table
  bhat.table.list <- bhat.table.list[!sapply(bhat.table.list, is.null)]
  bhat.table.list2 <- bhat.table.list[-1]
  bhat.table.list_new <- lapply(bhat.table.list2, function(x)  return(x[,3:9]))
  bhat.results <- do.call(cbind, bhat.table.list_new)
  if(!is.null(bhat.results)){
    ret_pri <- cbind(bhat.table.list[[1]][,1:2], bhat.results)
  }else ret_pri <- bhat.table.list[[1]]

  # Intercept & Slope
  a.b.list <- a.b.list[!sapply(a.b.list,is.null)]
  if(length(a.b.list) > 0){
    a.b.df <- do.call(rbind,a.b.list)
    a.b.df <- cbind(1:length(a.b.df[,1]),a.b.df)
    colnames(a.b.df) <- c("Cohort","Intercept","Slope","Start_point","End_point")
  }else a.b.df = NA

  # Lmean and SD
  l.s.list <- l.s.list[!sapply(l.s.list,is.null)]
  if(length(l.s.list) > 1){
    l.s.df <- do.call(rbind,l.s.list)
    l.s.df <- cbind(1:length(l.s.df[,1]),l.s.df)
    # seperation index (SI)
    SIs <- rep(NA,length(l.s.df[,1]))
    for(i in 1:(length(l.s.df[,1])-1)){
      SIs[i] <- abs(l.s.df[i+1,2]-l.s.df[i,2]) / abs((l.s.df[i+1,3]+l.s.df[i,3])/2)
    }
    l.s.df <- cbind(l.s.df,SIs)
    colnames(l.s.df) <- c("Cohort","Lmean","SD","SI")
  }else l.s.df <- NA

  temp.list <- temp.list[!sapply(temp.list, is.null)]
  coho_plot <- coho_plot[!sapply(coho_plot,is.null)]

  ret <- list(res,
              regression_lines = a.b.df,
              Lmean_SD_list = l.s.df,
              bhat_results = ret_pri,
              distributions = temp.list,
              cohort_plots = coho_plot)
  class(ret) <- "Bhattacharya"
  if(cancel != TRUE){
    plot(ret)
    return(ret)
  }

    }else{
        writeLines("Interactive session needed for Bhattacharya.")
        return(NULL)
    }
}