iec: Index of Ecological Condition (IEC)

# functions for generating  IEC plots

# Authors: Nicholas G. Walton and Robert W. Howe
# created: 13 Oct 2014
# Last modified: 25 Sept 2015



## LOF plot ----

#' Plot BRC lack-of-fit (LOF).
#'
#' \code{plot_lof} plots the LOF of for each taxon in a BRC data frame.
#'
#' The LOF for each taxon in a BRC data frame (generated by
#' \code{\link{est_brc}}) is plotted on a single frame by index.  This can be
#' helpful for spotting taxa with large LOF relative to other taxa.  This is
#' intended as a heuristic plot only.
#'
#' @param min_lof numeric value indicating minimum LOF to label.
#' @inheritParams est_iec
#' @seealso \code{\link{est_brc}}
plot_lof <- function(brc, min_lof = 1) {
  # brc is a data frame containing Bioric Response Curves generated by
  # function est_brc.
  # min_lof is minimum value to label LOF values.  Values >= min_lof will be
  # labeled with their taxa in the output figure.
  #
  # Plot the lack of fit for all species on a single figure.
  # This plot is useful for spotting species with exceptionally poor LOF
  # relative to other the species.
  plot(brc$LOF, main = "Lack of Fit for all species",
       xlab = "Index (record number)", ylab = "LOF")

  for (i in 1:nrow(brc)) {
    # Label all species with a LOF greater than 1
    if (brc$LOF[i] >= min_lof) {
      text(i, brc$LOF[i], brc$Taxon[i], cex = 0.8, pos = 4)
    }
  }
}


## plot BRC with observations ----

#' Plot BRC
#'
#' \code{plot_brc} plots biotic response curves generated by
#' \code{\link{est_brc}} along with the observations used to generate them.
#'
#' BRC plots are generated for each taxa in data frames \code{sp} and
#' \code{brc}.  \code{sp} and \code{brc} must contain the same taxa in the same
#' order.  Generally, \code{sp} and \code{ref_grad} will be the same variables
#' used to generate \code{brc} with \code{\link{est_brc}}.  Because many plots
#' will be generated by this function, it may be best to send it to a file
#' plotting device (e.g., \code{\link[grDevices]{pdf}}).
#'
#' The optional arguments, \code{env_bad} and \code{env_good}, provide a way to
#' label each end of the x-axis with the worst and best observed values of the
#' original environmental gradient. This can be helpful while exploring the
#' data to see if a gradient has been scaled correctly (inverted or not).
#' \code{sub_xlab} can be used to add the original name of the gradient to the
#' x-axis below the x-axis label. \code{sub_main} provides an optional sub title
#' printed below the main title (name of taxon extracted from input \code{sp}).
#' This can be a good place to print additional information about the plot such
#' at the project or input dataset.
#'
#' @param env_bad numeric scalar worst condition of original environmental
#'   gradient (optional).
#' @param env_good numeric scalar best condition of original environmental
#'   gradient (optional).
#' @param ylab a title for the y axis (optional; defaults to "Response").
#' @param sub_xlab name of environmental gradient (optional);
#'   sub title to x-axis.
#' @param sub_main a sub title for the plots (optional); printed below main
#'   title (taxon name).
#' @inheritParams est_iec
#' @inheritParams est_brc
#' @seealso \code{\link{est_brc}}, \code{\link{scale10}},
#'   \code{\link[grDevices]{Devices}}
plot_brc <- function(sp, brc, ref_grad, env_bad = NULL, env_good = NULL,
                     ylab = "Response", sub_xlab = NULL, sub_main = NULL) {
  # Generate scatter plot and predicted normal curve for each species.
  # One page per species.
  for (species in 1:ncol(sp)) {
    with(brc[species, ], {
      # "with" makes the following variables from data frame "brc" available:
      # Taxon, LOF, Mean, SD, H, LOF, R2, Sens, n

      # Choose location of figure text (top right or top left side
      # of figure).
      placement <- ifelse(dnorm(0, Mean, SD) * H > 0.5 * max(sp[, species]),
                          'topright', 'topleft')

      # Generate scatter plot and predicted normal curve for current species.
      # x axis label for plot.
      xlab <- expression(Environmental ~ Condition ~ (italic(C[env])))
      # xlab <- "Environmental (Reference) Condition" # Bob's xlab
      y_max <- max(sp[, species]) + 0.1   # needed so we can set ylim min to 0
      if (y_max <= 1) y_max <- 1           # so that flat BRCs are flat
      plot(sp[, species] ~ ref_grad, main = names(sp)[species], sub = sub_xlab,
           cex.sub = 0.8, xlab = xlab, ylab = ylab,
           xlim = c(0, 10), ylim = c(0, y_max))
      curve(dnorm(x, Mean, SD) * H, add = TRUE)

      # correlation between species and reference grad
      r <- cor(sp[, species], ref_grad)

      # root mean square error
      model_error <- sp[, species] - dnorm(ref_grad,Mean,SD)*H
      # move rmse function to misc_functions.R if used more than once
      model_rmse <- sqrt(mean(model_error^2))

      # Add text to plot.
      mtext(sub_main, side = 3, line = 0.4, cex = 0.7)
      mtext(env_bad, side = 1, line = 2, adj = 0.03, cex = 0.8)
      mtext(env_good, side = 1, line = 2, adj = 0.97, cex = 0.8)

      legend(placement,
             legend = c(paste("LOF:", signif(LOF, 3)),
                        paste("RMSE:", signif(model_rmse, 3)),
                        paste("r:", signif(r, 2)),
                        as.expression(bquote(mu: ~ .(signif(Mean, 3)))),
                        as.expression(bquote(sigma: ~ .(signif(SD, 3)))),
                        paste("H:", signif(H, 3))),
             bty = "n", cex = 0.6)
    })
  }
}


## print BRC to pdf----

#' Save BRC plots to PDF file.
#'
#' \code{brc_pdf} saves the output from \code{\link{plot_lof}} and
#' \code{\link{plot_brc}} to a PDF.
#'
#' \code{brc_pdf} checks for if the output file already exists and returns an
#' error is it does.  Otherwise, \code{brc_pdf} opens a PDF device to print to
#' (\code{out_pdf}) and then saves the output from \code{\link{plot_lof}} and
#' \code{\link{plot_brc}} to the PDF file.  This is done within
#' \code{\link[base]{tryCatch}} such that \code{\link[grDevices]{dev.off}} is
#' safely called even if the printing fails.
#'
#' @param out_pdf name of output PDF file.
#' @inheritParams plot_lof
#' @inheritParams plot_brc
#' @seealso \code{\link{est_brc}}, \code{\link{scale10}},
#'   \code{\link[grDevices]{Devices}}
brc_pdf <- function(out_pdf, sp, brc, ref_grad, min_lof = 1,
                    env_bad = NULL, env_good = NULL, ylab = "Response",
                    sub_xlab = NULL, sub_main = NULL) {
  # check/fix extension
  if (!grepl("\\.pdf$", out_pdf, ignore.case = TRUE)) {
    out_pdf <- paste(out_pdf, ".pdf", sep = "")
  }

  # Unless the output figure file already exists, plot the figures.
  if (file.exists(out_pdf)) {
    # "out_pdf" is set in the "Declarations" section of this script.
    # If the output "out_pdf" already exists, print the following
    # error message:
    message(paste("Output figure file \"", out_pdf,
                  "\" already exists...\nNo figure file written.", sep = ""))
  } else {
    # Open a pdf file to write the figures to - this will be written to the
    # current working directory.
    pdf(file = out_pdf)

    tryCatch({
      plot_lof(brc, min_lof)
      plot_brc(sp, brc, ref_grad, env_bad, env_good, ylab, sub_xlab, sub_main)

      # Print message confirming that figures were written.
      message(paste("Figures written to file \"", out_pdf, "\"", sep = ""))

    }, finally = {
      # Close the pdf.
      dev.off()
    })
  }
}


## plot response probability curve for sites ----

#' Plot response surface for sites.
#'
#' \code{plot_resp} plots the response surface for the IEC score at each site in
#' data frame \code{sp}.
#'
#' Plots are generated for each site in \code{sp} with probability on the y-axis
#' and IEC score on the x-axis. The "best" value found by \code{\link{est_iec}}
#' is also plotted as a vertical line.  Note that \code{sp}, \code{brc}, and
#' \code{method} must be the same as those used to generate \code{iec} using
#' \code{\link{est_iec}}. Also, note that this function can be quite time
#' consuming. It is generally best to save the output to an external plotting
#' device (e.g., \code{\link[grDevices]{pdf}})
#'
#' @param iec the output data frame from \code{\link{est_iec}}.
#' @inheritParams est_iec
#' @seealso \code{\link{est_iec}}, \code{\link[grDevices]{Devices}}
plot_resp <- function (sp, iec, brc, method = "pa") {
  # this needs to be rewritten with externalized method selection and f

  # Set method/criteria for estimating IEC
  # Return function for quant or pa based on "method"
  # Method must be set to "q" or "pa"
  if (method == "q") {
    criteria <- function(pc, observed) {
      # Least-squares method (AKA "q").
      # Note that currently the output from this will have the
      # opposite sign of that from the Excel spread sheet.
      # This needs to be fixed, but will also require modifications to
      # "pa".
      numerator <- (observed - pc) ^ 2
      sum(numerator / pc)   # (Obs-Exp)^2 / Exp
    }
  } else {
    criteria <- function(pc, observed) {
      # Likelihood method (AKA "pa").
      # Calculate lack-of-fit expression ("lof") for each species.
      lof <- rep(NA, length(pc))  # set lof to length of pc.
      # If species is present at the current site, set to Log P(C).
      lof[observed > 0] <- log10(pc)[observed > 0]
      # If species is absent, set to Log (1-P(C)).
      # Per the Excel file, it's really Log(1.0001-P(C)) to avoid log of 0.
      lof[is.na(lof)] <- log10(1.0001 - pc)[is.na(lof)]

      # Return the negative sum of the lof.
      # I'm using the negative because the original function in Excel was
      # set to maximize, but "nlminb" can only minimize a function.
      # The result is the same.
      -sum(lof)
    }
  }

  f <- function(x) {
    # This function returns the function which
    # will be minimized with function "nlminb".
    # "x" is the current IEC score being tried by "nlminb".
    # Note that observed is called from outside the function, but there
    # doesn't seem to be a better way to do this with "nlminb".

    # Calculate P(C) for each species.
    # In the original version, P(C) was constrained to > 0 and < or = 1.
    # P(C) is the probability or value of each species at given IEC.
    # Input values "Mean", "SD", and "H" are part of data frame "brc".
    pc <- with(brc, {dnorm(x, mean = Mean, sd = SD) * H})
    pc[pc == 0] <- .001        # Set 0 probabilities to .001.

    # criteria is set in the Declarations section of this script.
    # It is set to either pa (default) or quant.
    criteria(pc, observed)
  }

  for (site in 1:nrow(sp)) {
    # Extract the observed probabilities for the current site.
    observed <- sp[site, ]

    pts <- seq(0, 10, by = 0.2)
    resp <- -unlist(lapply(pts, function(x) f(x)))

    # this needs modification to allow labeling for both methods.
    plot(resp ~ pts, type = "l", main = paste("Site:", row.names(sp)[site]),
         xlab = "IEC", ylab = "Sum LogLik (larger values are better)",
         xlim = c(0,10), lty = 1)

    # Add vertical line at most likely value of IEC.
    abline(v = iec$IEC[site], lty = 2)

    # Add legend to plot.
    placement <- ifelse(iec$IEC[site] < 5, 'topright', 'topleft')
    legend(placement, legend = c(expression(sum(resp[i], i == 1, n)),
                                 paste("Maxima(", round(iec$IEC[site], 2), ")",
                                       sep = "")),
           lty = c(1, 2), bty = "n")
  }
}


## plot IEC vs. original environmental gradient ----

#' Plot estimated IEC vs. environmental gradient
#'
#' \code{plot_iec_cor} plots the IEC scores estimated by \code{\link{est_iec}}
#' (y-axis) against the environmental gradient (x-axis).  Generally this is used
#' with the original environmental gradient in its unscaled format (i.e., before
#' using \code{\link{scale10}}). Note that this function is only applicable when
#' applying \code{\link{est_iec}} to the same sites used to generate the taxa
#' specific Biotic Response Curves (\code{\link{est_brc}}).
#'
#' @param main an overall title for the plot (e.g., "Fish 2015 (total ag)").
#' @param xlab name of original gradient for the x axis (default is "Original
#'   Gradient")
#' @param time logical indicating if time of plotting should be printed below
#'   the main title.
#' @inheritParams plot_resp
#' @inheritParams scale10
#' @inheritParams plot_brc
#' @examples
#' data(list = c("fish_sp", "fish_grad"))
#' grad <- scale10(fish_grad, TRUE)
#' brc <- est_brc(fish_sp, grad)
#' iec <- est_iec(fish_sp, brc, n_reps = 10)
#' plot_iec_cor(iec, fish_grad, "fish")
plot_iec_cor <- function (iec, env_grad, main = NULL,
                          xlab = "Original Gradient", time = TRUE) {
  plotstats <-lm(iec$IEC ~ env_grad)
  R2 <- summary(plotstats)$r.squared
  slope <- summary(plotstats)$coefficient[2]

  # plot IEC site scores against original gradient
  lim <- c(0, 10)
  plot(env_grad, iec$IEC, xlab = xlab, ylab = "IEC", main = main,
       xlim = lim, ylim = lim, cex.sub = 0.8, cex.main = 0.97)
  if (time) mtext(Sys.time(), side = 3, cex = 0.7)
  abline(plotstats)

  # add legend
  if (slope < 0) {
    locate = 'topright'
  } else {
    locate = 'topleft'
  }

  legend(locate, lty = 0, bty = "n",
         legend = c(as.expression(bquote(R^2: ~ .(signif(R2, 2))))),
         cex = 0.9)
}