R/Turing.R

Defines functions plotTSdata plotCAAdata TuringMOM Turing

Documented in Turing TuringMOM

### Fix:
# - nyears != nyears Data
# - missing data in Catch or Index - linearly interpolate?
# - missing years of CAA and CAL
# - max age for CAA
# - number of size bins
# Turing(OM, Data)

# CAA and CAL should match Year - work on Data object
# recommended by Andre Punt

#' Turing Test
#'
#' Plots the available data in the `Data` object together with 5 samples of
#' historical data from the Operating Model (OM) in a random order. The test is
#' used to determine if the data generated by the OM is similar to the fishery
#' data in the `Data` object. In a well specified OM the user should not be able
#' to visually identify which of the 6 plots is the real fishery data and which
#' are generated by the OM.'
#'
#' In its current form the Turing function does not interpolate missing data in the
#' Data object. Therefore if there are years with missing data, say in the catch
#' time-series, it will be obvious which are the real data and which have been
#' generated by the model. Future versions of the function may include methods to
#' impute missing data for plotting purposes.
#'
#' The question to ask when examining the plots produced by `Turing`: do the plots
#' of the 6 data samples look like they are all samples from the same underlying distribution?
#'
#' @param OM An object of class `OM` or class `multiHist`
#' @param Data An object of class `Data` or a nested list of `Data` objects for each stock and fleet
#' @param wait Logical. Wait for key press before next plot?
#'
# #' @templateVar url evaluating-om
# #' @templateVar ref NULL
# #' @template userguide_link
#'
#' @note
#' The Turing function was suggested by Andre Punt in his review of one of our
#' recent projects. It is named after the Turing test, developed by Alan Turing in
#' 1950, which is designed to see if a human can detect the difference between
#' human and machine generated information.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' Turing(MSEtool::testOM, MSEtool::SimulatedData, wait=FALSE)
#' }
#'
Turing <- function(OM, Data, wait=TRUE) {
  if (methods::is(OM, "multiHist"))
    return(TuringMOM (OM, Data, wait))
  if (!methods::is(OM,"OM")) stop("OM must be class 'OM'")
  if (!methods::is(Data,"Data")) stop("Data must be class 'Data'")

  # Generate Historical Data
  message("Simulating Data")
  # if length data exists, make sure length bins are the same
  if (!all(is.na(Data@CAL[1,,]))) {
    CAL_bins <- Data@CAL_bins
    OM@cpars$CAL_bins <- CAL_bins
    if (max(OM@Linf) > max(CAL_bins))
      stop("`OM@Linf` is greater than `max(Data@CAL_bins)`.\nIncrease number of CAL_bins or check if Data object is correct.", call.=FALSE)
  }


  # make sure maxage is the same
  if (all(is.na(Data@MaxAge))) stop("Data@MaxAge is NA. Value is required", call. = FALSE)
  OM@cpars$maxage <- Data@MaxAge

  nsamp <- 5
  OM <- SubCpars(OM, seq_len(nsamp + 2))
  SimDat <- runMSE(OM, Hist=TRUE, silent = TRUE)@Data
  # if(!all(SimDat@CAL_bins == Data@CAL_bins)) stop("CAL_bins not correct length")

  YrInd <- 1:OM@nyears

  if (length(Data@Year) < OM@nyears) {
    message("Note: length Data@Year (", length(Data@Year), ") is < length OM@nyears (", OM@nyears, ") \nUsing last ",
            length(Data@Year), " years of simulations")
    YrInd <- (OM@nyears - length(Data@Year)+1): OM@nyears

  } else if (length(Data@Year) > OM@nyears) {
    stop("OM@nyears is < length(Data@Year). Should OM@nyears be increased to match Data?", call. = FALSE)
  }

  set.seed(as.numeric(Sys.time()))
  samps <- sample(1:OM@nsim, nsamp)

  message("Randomly sampling 5 iterations")

  # ---- Catch Data ----
  plotTSdata(Ylab="St. Catch", slot="Cat", message="Catch Data", Data, SimDat,
             samps, YrInd, wait)

  # ---- Index Data ----
  plotTSdata(Ylab="St. Index", slot="Ind", message="Index Data", Data, SimDat,
             samps, YrInd, wait)

  # ---- Recruitment Data ----
  plotTSdata(Ylab="St. Recruitment", slot="Rec", message="Recruitment Data", Data,
             SimDat, samps, YrInd, wait)

  # ---- Mean Length Data ----
  plotTSdata(Ylab="Mean Length", slot="ML", message="Mean Length Data", Data,
             SimDat, samps, YrInd, wait, standarise=FALSE)

  # ---- Mean Length Data ----
  plotTSdata(Ylab="Lbar", slot="Lbar", message="Lbar Data", Data,
             SimDat, samps, YrInd, wait, standarise=FALSE)

  # ---- Catch-at-Age ----
  plotCAAdata(Ylab="Count", slot="CAA", message="Catch-at-Age Data", Data,
              SimDat, samps, YrInd, wait)

  # ---- Catch-at-Length ----
  plotCAAdata(Ylab="Count", slot="CAL", message="Catch-at-Length Data", Data,
              SimDat, samps, YrInd, wait)
}

#' @describeIn Turing Turing function for multi-stock, multi-fleet MOMs
#' @param multiHist An object of class `multiHist`. The output of `SimulateMOM`
#' @export
TuringMOM <- function(multiHist, Data, wait=TRUE) {

  if (!'multiHist' %in% class(multiHist)) stop("multiHist must be class multiHist`")
  if (!methods::is(Data,'list')) stop("Data must be list")
  np <- length(multiHist)
  nf <- length(multiHist[[1]])

  if (any(ldim(Data) != c(np, nf)))
    stop('Data must be a nested list of length `nstocks` each with `nfleets')
  if (!methods::is(Data[[1]][[1]],'Data'))
    stop('Data is not a nested list of objects of class `Data`')

  nsim <- nrow(multiHist[[1]][[1]]@OMPars)
  nyears <- dim(multiHist[[1]][[1]]@TSdata$Number)[2]

  for (p in 1:np) {
    for (f in 1:nf) {
      if (length(Data[[p]][[f]]@Year) != nyears) {
        stop("Note: length Data@Year (", length(Data[[p]][[f]]@Year), ") for stock ", p,
             " and fleet ", f, " is not equal to the number of historical simulated years (", nyears, ")")
      }
    }
  }

  nsamp <- min(5, nsim)
  set.seed(as.numeric(Sys.time()))
  samps <- sample(1:nsim, nsamp)

  message("Randomly sampling 5 iterations")

  YrInd <- 1:nyears

  for (p in 1:np) {
    for (f in 1:nf) {
      # ---- Catch Data ----
      message <- paste0('Catch Data for Stock ', p, ' and Fleet ', f)
      plotTSdata(Ylab="St. Catch", slot="Cat", message=message, Data[[p]][[f]],
                 multiHist[[p]][[f]]@Data,
                 samps, YrInd, wait)

      # ---- Index Data ----
      message <- paste0('Index Data for Stock ', p, ' and Fleet ', f)
      plotTSdata(Ylab="St. Index", slot="Ind", message=message, Data[[p]][[f]],
                 multiHist[[p]][[f]]@Data,
                 samps, YrInd, wait)

      # ---- Recruitment Data ----
      message <- paste0('Recruitment Data for Stock ', p, ' and Fleet ', f)
      plotTSdata(Ylab="St. Recruitment", slot="Rec", message=message, Data[[p]][[f]],
                 multiHist[[p]][[f]]@Data, samps, YrInd, wait)

      # ---- Mean Length Data ----
      message <- paste0('Mean Length Data for Stock ', p, ' and Fleet ', f)
      plotTSdata(Ylab="Mean Length", slot="ML", message=message, Data[[p]][[f]],
                 multiHist[[p]][[f]]@Data, samps, YrInd, wait, standarise=FALSE)

      # ---- Mean Length Data ----
      message <- paste0('Lbar Data for Stock ', p, ' and Fleet ', f)
      plotTSdata(Ylab="Lbar", slot="Lbar", message=message, Data[[p]][[f]],
                 multiHist[[p]][[f]]@Data, samps, YrInd, wait, standarise=FALSE)

      # ---- Catch-at-Age ----
      message <- paste0('Catch-at-Age Data for Stock ', p, ' and Fleet ', f)
      plotCAAdata(Ylab="Count", slot="CAA", message=message, Data[[p]][[f]],
                  multiHist[[p]][[f]]@Data, samps, YrInd, wait)

      # ---- Catch-at-Length ----
      message <- paste0('Catch-at-Length Data for Stock ', p, ' and Fleet ', f)
      plotCAAdata(Ylab="Count", slot="CAL", message=message, Data[[p]][[f]],
                  multiHist[[p]][[f]]@Data, samps, YrInd, wait)
    }
  }
}

plotCAAdata <- function(Ylab="Count", slot="CAA", message="Catch-at-Age Data",
                        Data, SimDat, samps, YrInd, wait) {
  Year <- Val <- Freq <- Sim <- NULL # cran check hacks

  realDat <- slot(Data, slot)[1,,]
  sampDat <- slot(SimDat, slot)[samps,YrInd,]
  nsamp <- length(samps)
  if (all(is.na(realDat))) {
    message("No ", message, " found in Data object")
    return()
  }
  # plot last 4 years
  nyr <- nrow(realDat)
  realDat <- realDat[(nyr-3):nyr,]
  sampDat <- sampDat[,(nyr-3):nyr,]

  zeros <- dim(sampDat)[3]-dim(realDat)[2]
  if (zeros>0) {
    zeromat <- matrix(0, nrow=nrow(realDat), ncol=zeros)
    if (slot=='CAA') {
      message('The number of columns in `CAA` is less than `Data@MaxAge+1`. Filling with 0s')
      realDat <- cbind(realDat, zeromat)
    }
  }

  if (!all(is.na(realDat))) {
    message("Plotting: ", message)

    if (slot == "CAA") {
      # check maximum age
      if(Data@MaxAge != SimDat@MaxAge)
        stop('Maximum age is different in Simulated and Real Data. Simulated Data has a maximum age of ', SimDat@MaxAge,
             ' but Real Data has maximum age of ', Data@MaxAge)

      nyrs <- nrow(realDat); n_age <- ncol(realDat)
      dimnames(realDat) <- list(1:nyrs, 1:n_age)

      df1 <- as.data.frame.table(realDat, stringsAsFactors = FALSE)
      colnames(df1) <- c("Year", "Val", "Freq")

      dimnames(sampDat) <- list(1:nsamp, 1:nyrs, 1:n_age)
      df2 <- as.data.frame.table(sampDat, stringsAsFactors = FALSE)
      colnames(df2) <- c("Sim", "Year", "Val", "Freq")

      Xlab <- "Age"
    } else if (slot == "CAL") {
      nyrs <- nrow(realDat); nbins <- length(Data@CAL_bins) - 1
      By <- Data@CAL_bins[2] - Data@CAL_bins[1]
      BinsMid <- seq(Data@CAL_bins[1] + 0.5*By, by=By,length.out = nbins)
      dimnames(realDat) <- list(1:nyrs, BinsMid)

      if (any(dim(realDat) != dim(sampDat[1,,]))) {
        warning('Dimensions of Simulated CAL Data and Real CAL Data are not the same. You may need to add CAL_bins to cpars. Skipping this plot...')
        return()
      }

      df1 <- as.data.frame.table(realDat, stringsAsFactors = FALSE)
      colnames(df1) <- c("Year", "Val", "Freq")

      dimnames(sampDat) <- list(1:nsamp, 1:nyrs, BinsMid)
      df2 <- as.data.frame.table(sampDat, stringsAsFactors = FALSE)
      colnames(df2) <- c("Sim", "Year", "Val", "Freq")

      Xlab <- "Length"
    }


    df1$Sim <- as.character(dim(sampDat)[1]+1)
    df3 <- dplyr::bind_rows(df1, df2)

    df3$Sim <- as.factor(df3$Sim)
    levels(df3$Sim) <- sample(1:6, 6) # randomise
    df3$Sim <- factor(df3$Sim, levels=1:(nsamp+1), ordered = TRUE)

    df3$Val <- as.numeric(df3$Val)
    df3$Year <- as.numeric(df3$Year)
    df3$Freq <- as.numeric(df3$Freq)

    realInd <- unique(df3$Sim[unique(match(df3$Freq, df1$Freq))])
    realInd <- as.numeric(as.character(realInd[!is.na(realInd)] ))

    Years <- unique(df3$Year)
    SampYears <- (max(Years)-3):max(Years)

    df4 <- df3 %>% dplyr::filter(Year %in% SampYears)

    # filter empty bins
    Bcount <- df4 %>% dplyr::group_by(Val) %>% dplyr::summarize(count=sum(Freq>0))
    indmin <- min(which(Bcount$count >0))
    indmax <- max(which(Bcount$count >0))
    Bmin <- Bcount$Val[max(indmin-1, 1)]
    Bmax <- Bcount$Val[min(indmax+1, length(Bcount$Val))]
    df4 <- df4 %>% dplyr::filter(Val >= Bmin & Val <= Bmax)

    nrow <- length(unique(df4$Sim))
    ncol <- length(unique(df4$Year))
    nplot <- nrow*ncol

    pmat <- matrix(1:nplot, nrow=nrow, ncol=ncol, byrow=TRUE)

    op <- par(mfrow=c(nrow, ncol), no.readonly = TRUE, mar=c(2,2,1,1), oma=c(4,4,2,0))
    on.exit(par(op))

    ylim <- c(0, max(df4$Freq))

    for (r in 1:nrow) {
      col <- "grey"
      for (c in 1:ncol) {
        dat <- df4 %>% dplyr::filter(Sim==r, Year == c)
        if (r == nrow) {
          barplot(dat$Freq, names=round(dat$Val, 2), axes=FALSE, col=col, ylim=ylim)
        } else {
          barplot(dat$Freq, axes=FALSE, col=col, ylim=ylim)
        }
        if (c == 1) axis(side=2)
        if (c != 1) axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, Xlab, line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)

    if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))

    for (r in 1:nrow) {
      col <- ifelse(r==realInd, "blue", "grey")
      for (c in 1:ncol) {
        dat <- df4 %>% dplyr::filter(Sim==r, Year == c)
        if (r == nrow) {
          barplot(dat$Freq, names=round(dat$Val, 2), axes=FALSE, col=col, ylim=ylim)
        } else {
          barplot(dat$Freq, axes=FALSE, col=col, ylim=ylim)
        }
        if (c == 1) axis(side=2)
        if (c != 1) axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, Xlab, line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)

    if (wait) tt <- readline("Press [enter] to continue...")

    #
    # P1 <- ggplot2::ggplot(df4, ggplot2::aes(x=Val, y=Freq)) + ggplot2::geom_bar(stat="identity") +
    #   ggplot2::facet_grid(Sim~Year) + ggplot2::theme_classic() +
    #   ggplot2::labs(x=Xlab, y=Ylab) + ggplot2::ggtitle(message)
    #
    # df5 <- df4 %>% dplyr::filter(Sim == realInd)
    # P2 <- P1 + ggplot2::geom_bar(fill="blue", data=df5, stat="identity")
    # print(P1)
    # if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))
    # print(P2)
    # if (wait) tt <- readline("Press [enter] to continue...")

  } else {
    message("No ", message, " found in Data object")
  }

}


plotTSdata <- function(Ylab, slot, message, Data, SimDat, samps, YrInd,
                       wait=TRUE, standarise=TRUE, lwd=2) {
  Year <- Val <- Freq <- Sim <- value <- key <- NULL # cran check hacks
  realDat <- slot(Data, slot)[1,]
  # remove NAs
  YrInd2 <- YrInd[is.finite(realDat)]
  YrInd3 <- Data@Year[is.finite(realDat)]
  realDat <- realDat[is.finite(realDat)]
  sampDat <- slot(SimDat, slot)[samps,YrInd2]

  if (!all(is.na(realDat))) {
    message("Plotting: ", message)
    CombDat <- rbind(sampDat, realDat)
    CombDat <- CombDat[sample(1:nrow(CombDat)),]
    realInd <- which(match(data.frame(t(CombDat)), data.frame(realDat)) == 1)
    if (standarise) CombDat <- CombDat/matrix(apply(CombDat, 1, mean, na.rm=TRUE),
                                              nrow=nrow(CombDat), ncol=ncol(CombDat))
    rownames(CombDat) <- rep("", nrow(CombDat))
    tCombDat <- as.data.frame(t(CombDat))

    nplot <- ncol(tCombDat)
    nrow <- floor(sqrt(nplot))
    ncol <- nplot/nrow

    pmat <- matrix(1:nplot, nrow=nrow, ncol=ncol, byrow=TRUE)

    op <- par(mfrow=c(nrow, ncol), no.readonly = TRUE, mar=c(2,2,1,1), oma=c(4,4,2,0))
    on.exit(par(op))

    ylim <- range(tCombDat)
    for (r in 1:nplot) {
      plot(YrInd3, tCombDat[,r], type="l", axes=FALSE, xlab="", ylab="", lwd=lwd,
           ylim=ylim)
      if (r %in% pmat[1,]) axis(side=1, labels=FALSE)
      if (r %in% pmat[2,]) axis(side=1, labels=TRUE)
      if (r %in% pmat[,1]) {
        axis(side=2, labels=TRUE)
      } else {
        axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, "Year", line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)

    if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))

    for (r in 1:nplot) {
      lcol <- ifelse(r==realInd, "blue", "black")
      plot(YrInd3, tCombDat[,r], type="l", axes=FALSE, xlab="", ylab="", lwd=lwd,
           col=lcol, ylim=ylim)
      if (r %in% pmat[1,]) axis(side=1, labels=FALSE)
      if (r %in% pmat[2,]) axis(side=1, labels=TRUE)
      if (r %in% pmat[,1]) {
        axis(side=2, labels=TRUE)
      } else {
        axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, "Year", line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)

    if (wait) tt <- readline("Press [enter] to continue...")


    # df <- tidyr::gather(tCombDat, factor_key=TRUE)
    # df$Year <- factor(rep(YrInd3, nrow(CombDat)))
    #
    # P1 <- ggplot2::ggplot(df, ggplot2::aes(x=Year, y=value, group=1)) + ggplot2::geom_line(size=1.2) +
    #   ggplot2::facet_wrap(~key, nrow=2) + ggplot2::theme_classic() +
    #   ggplot2::labs(x="Year", y=Ylab) + ggplot2::ggtitle(message) +
    #   ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1))
    #
    #
    # df2 <- df %>% dplyr::filter(key == paste0("V", realInd))
    # P2 <- P1 + ggplot2::geom_line(size=1.2, data=df2, color='blue')
    # print(P1)
    # if (wait) tt <- readline(paste0("Press any key to show real ", message, "..."))
    # print(P2)
    # if (wait) tt <- readline("Press any key to continue...")

  } else {
    message("No ", message, " found in Data object")
  }
}

Try the MSEtool package in your browser

Any scripts or data that you put into this service are public.

MSEtool documentation built on July 26, 2023, 5:21 p.m.