R/Turing.R

Defines functions plotTSdata plotCAAdata Turing

Documented in Turing

### Fix:
# - nyears != nyears Data
# - missing data in Catch or Index - linearly interpolate?
# - missing years of CAA and CAL
# - max age for CAA 
# - number of size bins 
# Turing(OM, Data)

# CAA and CAL should match Year - work on Data object
# recommended by Andre Punt 

#' Turing Test
#' 
#' Plots the available data in the `Data` object together with 5 samples of
#' historical data from the Operating Model (OM) in a random order. The test is 
#' used to determine if the data generated by the OM is similiar to the fishery 
#' data in the `Data` object. In a well specified OM the user should not be able 
#' to visually identify which of the 6 plots is the real fishery data and which 
#' are generated by the OM.' 
#' 
#' In its current form the Turing function does not interpolate missing data in the 
#' Data object. Therefore if there are years with missing data, say in the catch 
#' time-series, it will be obvious which are the real data and which have been 
#' generated by the model. Future versions of the function may include methods to
#' impute missing data for plotting purposes. 
#' 
#' The question to ask when examining the plots produced by `Turing`: do the plots 
#' of the 6 data samples look like they are all samples from the same underlying distribution?
#'   
#'
#' @param OM An object of class `OM`
#' @param Data An object of class `Data`
#' @param wait Logical. Wait for key press before next plot?
#'
#' @templateVar url evaluating-om
#' @templateVar ref NULL
#' @template userguide_link
#' 
#' @note 
#' The Turing function was suggested by Andre Punt in his review of one of our 
#' recent projects. It is named after the Turing test, developed by Alan Turing in 
#' 1950, which is designed to see if a human can detect the difference between 
#' human and machine generated information. 
#' 
#' @export
#'
#' @examples
#' \dontrun{
#' Turing(DLMtool::testOM, DLMtool::SimulatedData, wait=FALSE)
#' }
#' 
Turing <- function(OM, Data, wait=TRUE) {
  if (class(OM) != "OM") stop("OM must be class 'OM'")
  if (class(Data) != "Data") stop("Data must be class 'Data'")

  # Generate Historical Data 
  message("Simulating Data")
  # if length data exists, make sure length bins are the same
  if (!all(is.na(Data@CAL[1,,]))) { 
    CAL_bins <- Data@CAL_bins
    OM@cpars$CAL_bins <- CAL_bins
    if (max(OM@Linf) > max(CAL_bins)) 
      stop("`OM@Linf` is greater than `max(Data@CAL_bins)`.\nIncrease number of CAL_bins or check if Data object is correct.", call.=FALSE)
  }
  
  
  # make sure maxage is the same
  if (all(is.na(Data@MaxAge))) stop("Data@MaxAge is NA. Value is required", call. = FALSE)
  OM@cpars$maxage <- Data@MaxAge
  
  nsamp <- 5
  OM@nsim <- nsamp +2 
  SimDat <- runMSE(OM, Hist=TRUE, silent = TRUE)@Data
  # if(!all(SimDat@CAL_bins == Data@CAL_bins)) stop("CAL_bins not correct length")
  
  YrInd <- 1:OM@nyears
  
  if (length(Data@Year) < OM@nyears) {
    message("Note: length Data@Year (", length(Data@Year), ") is < length OM@nyears (", OM@nyears, ") \nUsing last ",
            length(Data@Year), " years of simulations")
    YrInd <- (OM@nyears - length(Data@Year)+1): OM@nyears

  } else if (length(Data@Year) > OM@nyears) {
    stop("OM@nyears is < length(Data@Year). Should OM@nyears be increased to match Data?", call. = FALSE) 
  }
  
  set.seed(as.numeric(Sys.time()))
  samps <- sample(1:OM@nsim, nsamp)

  message("Randomly sampling 5 iterations")
  
  # ---- Catch Data ---- 
  plotTSdata(Ylab="St. Catch", slot="Cat", message="Catch Data", Data, SimDat, 
             samps, YrInd, wait)
  
  # ---- Index Data ---- 
  plotTSdata(Ylab="St. Index", slot="Ind", message="Index Data", Data, SimDat, 
             samps, YrInd, wait)
  
  # ---- Recruitment Data ----
  plotTSdata(Ylab="St. Recruitment", slot="Rec", message="Recruitment Data", Data, 
             SimDat, samps, YrInd, wait)
  
  # ---- Mean Length Data ----
  plotTSdata(Ylab="Mean Length", slot="ML", message="Mean Length Data", Data, 
             SimDat, samps, YrInd, wait, standarise=FALSE)
  
  # ---- Mean Length Data ----
  plotTSdata(Ylab="Lbar", slot="Lbar", message="Lbar Data", Data, 
             SimDat, samps, YrInd, wait, standarise=FALSE)
  
  # ---- Catch-at-Age ----
  plotCAAdata(Ylab="Count", slot="CAA", message="Catch-at-Age Data", Data, 
              SimDat, samps, YrInd, wait)
  
  # ---- Catch-at-Length ----
  plotCAAdata(Ylab="Count", slot="CAL", message="Catch-at-Length Data", Data, 
              SimDat, samps, YrInd, wait)
}

    
plotCAAdata <- function(Ylab="Count", slot="CAA", message="Catch-at-Age Data", 
                        Data, SimDat, samps, YrInd, wait) {
  Year <- Val <- Freq <- Sim <- NULL # cran check hacks
  
  realDat <- slot(Data, slot)[1,,]
  sampDat <- slot(SimDat, slot)[samps,YrInd,]
  nsamp <- length(samps)
  if (all(is.na(realDat))) {
    message("No ", message, " found in Data object")
    return()
  }
  # plot last 4 years
  nyr <- nrow(realDat)
  realDat <- realDat[(nyr-3):nyr,]
  sampDat <- sampDat[,(nyr-3):nyr,]
  
  zeros <- dim(sampDat)[3]-dim(realDat)[2]
  if (zeros>0) {
    zeromat <- matrix(0, nrow=nrow(realDat), ncol=zeros)
    message('The number of columns in `CAA` is less than `Data@MaxAge`. Filling with 0s')
    realDat <- cbind(realDat, zeromat)
  }
 
  if (!all(is.na(realDat))) {
    message("Plotting: ", message)
   
    if (slot == "CAA") {
      nyrs <- nrow(realDat); maxage <- ncol(realDat)
      dimnames(realDat) <- list(1:nyrs, 1:maxage)
      
      df1 <- as.data.frame.table(realDat, stringsAsFactors = FALSE)
      colnames(df1) <- c("Year", "Val", "Freq")
      
      dimnames(sampDat) <- list(1:nsamp, 1:nyrs, 1:maxage)
      df2 <- as.data.frame.table(sampDat, stringsAsFactors = FALSE)
      colnames(df2) <- c("Sim", "Year", "Val", "Freq")
      
      Xlab <- "Age"
    } else if (slot == "CAL") {
      nyrs <- nrow(realDat); nbins <- length(Data@CAL_bins) - 1
      By <- Data@CAL_bins[2] - Data@CAL_bins[1]
      BinsMid <- seq(Data@CAL_bins[1] + 0.5*By, by=By,length.out = nbins)
      dimnames(realDat) <- list(1:nyrs, BinsMid)
      
      df1 <- as.data.frame.table(realDat, stringsAsFactors = FALSE)
      colnames(df1) <- c("Year", "Val", "Freq")
      
      dimnames(sampDat) <- list(1:nsamp, 1:nyrs, BinsMid)
      df2 <- as.data.frame.table(sampDat, stringsAsFactors = FALSE)
      colnames(df2) <- c("Sim", "Year", "Val", "Freq")
      
      Xlab <- "Length"
    }

    
    df1$Sim <- as.character(dim(sampDat)[1]+1)
    df3 <- dplyr::bind_rows(df1, df2)
    
    df3$Sim <- as.factor(df3$Sim)
    levels(df3$Sim) <- sample(1:6, 6) # randomise
    df3$Sim <- factor(df3$Sim, levels=1:(nsamp+1), ordered = TRUE)
    
    df3$Val <- as.numeric(df3$Val)
    df3$Year <- as.numeric(df3$Year)
    df3$Freq <- as.numeric(df3$Freq)

    realInd <- unique(df3$Sim[unique(match(df3$Freq, df1$Freq))])
    realInd <- as.numeric(as.character(realInd[!is.na(realInd)] ))

    Years <- unique(df3$Year)
    SampYears <- (max(Years)-3):max(Years)
    
    df4 <- df3 %>% dplyr::filter(Year %in% SampYears)
    
    # filter empty bins
    Bcount <- df4 %>% dplyr::group_by(Val) %>% dplyr::summarize(count=sum(Freq>0))
    indmin <- min(which(Bcount$count >0))
    indmax <- max(which(Bcount$count >0))
    Bmin <- Bcount$Val[max(indmin-1, 1)]
    Bmax <- Bcount$Val[min(indmax+1, length(Bcount$Val))]
    df4 <- df4 %>% dplyr::filter(Val >= Bmin & Val <= Bmax)
    
    nrow <- length(unique(df4$Sim))
    ncol <- length(unique(df4$Year))
    nplot <- nrow*ncol
    
    pmat <- matrix(1:nplot, nrow=nrow, ncol=ncol, byrow=TRUE)
    
    op <- par(mfrow=c(nrow, ncol), no.readonly = TRUE, mar=c(2,2,1,1), oma=c(4,4,2,0))
    on.exit(par(op))
    
    ylim <- c(0, max(df4$Freq))
    
    for (r in 1:nrow) {
      col <- "grey"
      for (c in 1:ncol) {
        dat <- df4 %>% dplyr::filter(Sim==r, Year == c)
        if (r == nrow) {
          barplot(dat$Freq, names=round(dat$Val, 2), axes=FALSE, col=col, ylim=ylim)    
        } else {
          barplot(dat$Freq, axes=FALSE, col=col, ylim=ylim)    
        }
        if (c == 1) axis(side=2)
        if (c != 1) axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, Xlab, line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)
   
    if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))
 
    for (r in 1:nrow) {
      col <- ifelse(r==realInd, "blue", "grey")
      for (c in 1:ncol) {
        dat <- df4 %>% dplyr::filter(Sim==r, Year == c)
        if (r == nrow) {
          barplot(dat$Freq, names=round(dat$Val, 2), axes=FALSE, col=col, ylim=ylim)    
        } else {
          barplot(dat$Freq, axes=FALSE, col=col, ylim=ylim)    
        }
        if (c == 1) axis(side=2)
        if (c != 1) axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, Xlab, line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)
    
    if (wait) tt <- readline("Press [enter] to continue...")
    
    # 
    # P1 <- ggplot2::ggplot(df4, ggplot2::aes(x=Val, y=Freq)) + ggplot2::geom_bar(stat="identity") +
    #   ggplot2::facet_grid(Sim~Year) + ggplot2::theme_classic() +
    #   ggplot2::labs(x=Xlab, y=Ylab) + ggplot2::ggtitle(message)
    # 
    # df5 <- df4 %>% dplyr::filter(Sim == realInd)
    # P2 <- P1 + ggplot2::geom_bar(fill="blue", data=df5, stat="identity")
    # print(P1)
    # if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))
    # print(P2)
    # if (wait) tt <- readline("Press [enter] to continue...")
  
  } else {
    message("No ", message, " found in Data object")
  }
  
}


plotTSdata <- function(Ylab, slot, message, Data, SimDat, samps, YrInd, 
                       wait=TRUE, standarise=TRUE, lwd=2) {
  Year <- Val <- Freq <- Sim <- value <- key <- NULL # cran check hacks
  realDat <- slot(Data, slot)[1,]
  # remove NAs
  YrInd2 <- YrInd[is.finite(realDat)] 
  YrInd3 <- Data@Year[is.finite(realDat)]
  realDat <- realDat[is.finite(realDat)]
  sampDat <- slot(SimDat, slot)[samps,YrInd2]
  
  if (!all(is.na(realDat))) {
    message("Plotting: ", message)
    CombDat <- rbind(sampDat, realDat)
    CombDat <- CombDat[sample(1:nrow(CombDat)),]
    realInd <- which(match(data.frame(t(CombDat)), data.frame(realDat)) == 1)
    if (standarise) CombDat <- CombDat/matrix(apply(CombDat, 1, mean, na.rm=TRUE), 
                                              nrow=nrow(CombDat), ncol=ncol(CombDat))
    rownames(CombDat) <- rep("", nrow(CombDat))
    tCombDat <- as.data.frame(t(CombDat))
    
    nplot <- ncol(tCombDat)
    nrow <- floor(sqrt(nplot))
    ncol <- nplot/nrow
    
    pmat <- matrix(1:nplot, nrow=nrow, ncol=ncol, byrow=TRUE)
    
    op <- par(mfrow=c(nrow, ncol), no.readonly = TRUE, mar=c(2,2,1,1), oma=c(4,4,2,0))
    on.exit(par(op))
    
    ylim <- range(tCombDat)
    for (r in 1:nplot) {
      plot(YrInd3, tCombDat[,r], type="l", axes=FALSE, xlab="", ylab="", lwd=lwd, 
           ylim=ylim)
      if (r %in% pmat[1,]) axis(side=1, labels=FALSE)
      if (r %in% pmat[2,]) axis(side=1, labels=TRUE)
      if (r %in% pmat[,1]) {
        axis(side=2, labels=TRUE)
      } else {
        axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, "Year", line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)
    
    if (wait) tt <- readline(paste0("Press [enter] to show real ", message, "..."))
    
    for (r in 1:nplot) {
      lcol <- ifelse(r==realInd, "blue", "black")
      plot(YrInd3, tCombDat[,r], type="l", axes=FALSE, xlab="", ylab="", lwd=lwd, 
           col=lcol, ylim=ylim)
      if (r %in% pmat[1,]) axis(side=1, labels=FALSE)
      if (r %in% pmat[2,]) axis(side=1, labels=TRUE)
      if (r %in% pmat[,1]) {
        axis(side=2, labels=TRUE)
      } else {
        axis(side=2, labels=FALSE)
      }
    }
    mtext(side=1, outer=TRUE, "Year", line=2, cex=1.5)
    mtext(side=2, outer=TRUE, Ylab, line=2, cex=1.5)
    title(message, outer=TRUE, cex=1.5)
    
    if (wait) tt <- readline("Press [enter] to continue...")

   
    # df <- tidyr::gather(tCombDat, factor_key=TRUE)
    # df$Year <- factor(rep(YrInd3, nrow(CombDat)))
    # 
    # P1 <- ggplot2::ggplot(df, ggplot2::aes(x=Year, y=value, group=1)) + ggplot2::geom_line(size=1.2) +
    #   ggplot2::facet_wrap(~key, nrow=2) + ggplot2::theme_classic() +
    #   ggplot2::labs(x="Year", y=Ylab) + ggplot2::ggtitle(message) +
    #   ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)) 
    # 
    # 
    # df2 <- df %>% dplyr::filter(key == paste0("V", realInd))
    # P2 <- P1 + ggplot2::geom_line(size=1.2, data=df2, color='blue')
    # print(P1)
    # if (wait) tt <- readline(paste0("Press any key to show real ", message, "..."))
    # print(P2)
    # if (wait) tt <- readline("Press any key to continue...")
    
  } else {
    message("No ", message, " found in Data object")
  }
}
 
  
DLMtool/DLMtool documentation built on June 20, 2021, 5:20 p.m.