R/ithresh_methods.R
In threshr: Threshold Selection and Uncertainty for Extreme Value Analysis

Documented in plot.ithresh print.ithresh summary.ithresh

# =========================== plot.ithresh ===========================

#' Plot diagnostics an ithresh object
#'
#' \code{plot} method for class \code{"ithresh"}.  Produces an extreme value
#' threshold diagnostic plot based on an analysis performed by
#' \code{\link{ithresh}}.  Can also be used to produce a plot of
#' the posterior sample generated by \code{\link{ithresh}} for a particular
#' training threshold.
#'
#' @param x an object of class \code{"ithresh"}, a result of a call to
#'   \code{\link{ithresh}}.
#' @param y Not used.
#' @param which_v A numeric scalar or vector.
#'
#'   If \code{which_u} is not supplied (a threshold diagnostic plot is
#'   required) \code{which_v} specifies the validation thresholds, that
#'   is, the components of \code{x$v_vec}, to include in the plot.
#'
#'   If \code{which_u} is supplied (a plot of a posterior sample for a given
#'   threshold is required) then \code{which_v} is a numeric scalar
#'   that indicates which element of \code{object$v_vec} is used in selecting
#'   a single threshold (if \code{which_u = "best"}).
#'   Note: the default, \code{which_v = 1} gives the \emph{lowest} of the
#'   validation thresholds in \code{object$v_vec}.
#' @param prob A logical scalar.  If \code{TRUE} then the levels of thresholds
#'   are represented by the proportion of observations that lie below a
#'   threshold.  If \code{prob = FALSE} then the values of the thresholds are
#'   used.
#' @param top_scale A logical scalar indicating Whether or not to add a scale
#'   to the top horizontal axis.  If this is added it gives the threshold on
#'   the scale not chosen by \code{prob}.
#' @param add_legend A logical scalar indicating whether or not to add a
#'   legend to the plot.  If \code{method = "cv"} then the legend gives the
#'   levels of the validation thresholds.
#' @param legend_pos The position of the legend (if required) specified using
#'   the argument \code{x} in \code{\link[graphics]{legend}}.
#' @param which_u Either a character scalar or a numeric scalar.
#'   If \code{which_u} is supplied then \code{\link[revdbayes]{plot.evpost}}
#'   is used to produce a plot of the posterior sample generated using
#'   a particular training threshold. By default a scatter plot of the posterior
#'   sample of Generalized Pareto parameters is produced.
#'
#'   If \code{which_u = "best"} then the training threshold achieving the largest
#'   measure of predictive performance in \code{object$pred_perf}, based
#'   on the validation threshold selected using \code{which_v}, is used.
#'   See \code{\link{summary.ithresh}} to print the best thresholds for each
#'   validation threshold.
#'
#'   Otherwise, \code{which_u} is a numeric scalar that selects training
#'   threshold \code{x$u_vec[which_u]}.  Therefore, \code{which_u} must
#'   be an integer in \code{1, ..., length(x$u_vec)}.
#' @param ... Additional arguments passed on to \code{\link[graphics]{matplot}}
#'   and/or \code{\link[graphics]{legend}} and/or \code{\link[graphics]{axis}}.
#'   If \code{which_u} is supplied then these arguments are passed to
#'   \code{\link[revdbayes]{plot.evpost}}.
#' @details Produces plots of the \emph{threshold weights}, defined in
#'  equation (14) of Northrop et al. (2017) against training threshold.  A line
#'  is produced for each of the validation thresholds chosen in \code{which_v}.
#'  The result is a plot like those in the top row of Figure 7 in
#'  Northrop et al. (2017).
#'
#'   It is possible that a curve on the plot may be incomplete.  This indicates
#'   that, for a particular threshold level, a measure of predictive
#'   performance is \code{-Inf}. This occurs when an observation in the data
#'   lies above the estimated upper end point of the predictive distribution
#'   produced when this observation is removed.
#' @return If \code{which_u} is supplied then the object with which
#'   \code{\link[revdbayes]{plot.evpost}} was called is returned (invisibly).
#'   Otherwise, a list is returned (again invisibly) with two components.
#'   \code{x} is a vector containing the coordinates plotted on the
#'   (lower) horizontal axis.
#'   \code{y} is an \code{length(u_vec)} by \code{n_v} matrix of
#'   \emph{threshold weights} obtained by normalising the columns of the
#'   matrix \code{pred_perf} returned by \code{\link{ithresh}}.
#'   See equation (14) of Northrop et al. (2017).
#' @examples
#' # [Smoother plots result from making n larger than the default n = 1000.]
#'
#' # Threshold diagnostic plot
#' u_vec_gom <- quantile(gom, probs = seq(0, 0.9, by = 0.05))
#' gom_cv <- ithresh(data = gom, u_vec = u_vec_gom, n_v = 3)
#' plot(gom_cv, lwd = 2, add_legend = TRUE, legend_pos = "topleft")
#' mtext("significant wave height / m", side = 3, line = 2.5)
#'
#' # Plot of Generalized Pareto posterior sample at the best threshold
#' # (based on the lowest validation threshold)
#' plot(gom_cv, which_u = "best")
#' # See which threshold was used
#' summary(gom_cv)
#'
#' # Plot of Generalized Pareto posterior sample at the highest threshold
#' n_u <- length(u_vec_gom)
#' plot(gom_cv, which_u = n_u, points_par = list(pch = 20, col = "grey"))
#' @seealso \code{\link{ithresh}} for threshold selection in the i.i.d. case
#'   based on leave-one-out cross-validation.
#' @seealso \code{\link{summary.ithresh}} Summarizing measures of threshold
#'   predictive performance.
#' @seealso \code{\link{print.ithresh}} Prints the threshold weights.
#' @seealso \code{\link{predict.ithresh}} for predictive inference for the
#'   largest value observed in N years.
#' @export
plot.ithresh <- function(x, y, ..., which_v = NULL, prob = TRUE,
                         top_scale = TRUE, add_legend = FALSE,
                         legend_pos = "topleft", which_u = NULL) {
  if (!inherits(x, "ithresh")) {
    stop("use only with \"ithresh\" objects")
  }
  n_u <- length(x$u_vec)
  # If which_u is supplied then produce a plot of the bin-GP posterior sample
  # using threshold x$u_vec[which_u].
  if (!is.null(which_u)) {
    # Check that which_u is admissible
    if (!(which_u %in% 1:n_u) & which_u != "best") {
      stop("'which_u' must be ''best'' or in 1:length(x$u_vec)")
    }
    # Check that which_v is admissible
    n_v <- length(x$v_vec)
    if (is.null(which_v)) {
      which_v <- 1L
    } else {
      if (!is.numeric(which_v) || !(which_v %in% 1:n_v)) {
        stop("'which_v' must be in 1:length(x$v_vec)")
      }
    }
    if (which_u == "best") {
      which_u <- which.max(x$pred_perf[, which_v])
    }
    # Construct list of arguments for revdbayes::rpost_rcpp
    for_post <- c(x$for_post, list(data = x$data, thresh = x$u_vec[which_u]))
    # Set n = 1 because we're not going to use the new sample. We already have
    # the simulated values: we only need an object of class "evpost".
    for_post$n <- 1
    # Select the correct posterior simulation function
    if (x$use_rcpp) {
      gp_postsim <- revdbayes::rpost_rcpp
    } else {
      gp_postsim <- revdbayes::rpost
    }
    # I can use trans = "none" because I only want to generate an object
    # that has the correct structure
    # If we get an error then change trans and try again
    temp <- tryCatch(
      do.call(gp_postsim, for_post),
      error = function(e) {
        if (is.null(for_post$trans) || for_post$trans == "none") {
          new_for_post <- c(for_post, trans = "BC")
        } else {
          new_for_post <- c(for_post, trans = "none")
        }
        do.call(gp_postsim, new_for_post)
      }
    )
    # Get the posterior sample for threshold x$u_vec[which_u] given by ithresh
    which_rows <- (1 + (which_u - 1) * x$n):(which_u * x$n)
    temp$bin_sim_vals <- x$sim_vals[which_rows, 1, drop = FALSE]
    temp$sim_vals <- x$sim_vals[which_rows, 2:3]
    # Add names for labelling the axes
    colnames(temp$bin_sim_vals) <- "p[u]"
    colnames(temp$sim_vals) <- c("sigma[u]", "xi")
    # Produce the plot.  Create a list for sending to revdbayes::plot.evpost()
    for_plot_evpost <- list(x = temp, ...)
    # If the user has tried to use ru_scale = TRUE then remove it
    # because no simulated values have been stored on the ru scale
    if (!is.null(for_plot_evpost$ru_scale)) {
      for_plot_evpost$ru_scale <- NULL
    }
    # plot is revdbayes::plot.evpost
#    suppressWarnings(do.call(revdbayes::plot.evpost, for_plot_evpost))
    class(for_plot_evpost) <- "evpost"
    suppressWarnings(do.call(plot, for_plot_evpost))
    return(invisible(temp))
  }
  # Use only the validation thresholds in columns which_v.
  if (!is.null(which_v)) {
    x$pred_perf <- x$pred_perf[, which_v, drop = FALSE]
    x$v_ps <- x$v_ps[which_v]
    x$v_vec <- x$v_vec[which_v]
  }
  # Calculate threshold weights.  Shift to avoid underflow.
  n_v <- length(x$v_vec)
  shoof <- matrix(colMeans(x$pred_perf * !is.infinite(x$pred_perf),
                           na.rm = TRUE), ncol = n_v, nrow = n_u,
                  byrow = TRUE)
  y_data <- apply(exp(x$pred_perf - shoof), 2,
                  function(x) x / sum(x, na.rm =TRUE))
  y_lab <- "threshold weight"
  if (prob) {
    v_data <- x$v_ps
  } else {
    v_data <- x$v_vec
  }
  # General aspects.
  if (prob) {
    x_data <- x$u_ps
    t_data <- x$u_vec
    x_lab <- "quantile of training threshold / %"
  } else {
    x_data <- x$u_vec
    t_data <- x$u_ps
    x_lab <- "threshold"
  }
  xy_args <- list(x = x_data, y = y_data)
  # Look for user-supplied arguments to matplot.
  user_args <- list(...)
  m_cond <- names(user_args) %in% methods::formalArgs(graphics::matplot)
  a_cond <- names(user_args) %in% methods::formalArgs(graphics::axis)
  l_cond <- names(user_args) %in% methods::formalArgs(graphics::legend)
  legend_args <- user_args[l_cond]
  matplot_args <- user_args[(!l_cond & !a_cond) | m_cond]
  axis_args <- user_args[(!l_cond & !m_cond) | a_cond]
  axis_args$col <- 1
  if (is.null(matplot_args$xlab)) {
    matplot_args$xlab <- x_lab
  }
  if (is.null(matplot_args$ylab)) {
    matplot_args$ylab <- y_lab
  }
  if (is.null(matplot_args$type)) {
    matplot_args$type <- "l"
  }
  if (is.null(matplot_args$col)) {
    matplot_args$col <- 1
    legend_args$col <- 1
  }
  if (is.null(matplot_args$lty)) {
    matplot_args$lty <- 1:ncol(y_data)
    legend_args$lty <- 1:ncol(y_data)
  }
  if (is.null(legend_args$title)) {
    legend_args$title <- "highest threshold"
  }
  legend_args$x <- legend_pos
  all_args <- c(xy_args, matplot_args)
  do.call(graphics::matplot, c(all_args, axes = FALSE))
  axis_args$side <- 2
  do.call(graphics::axis, axis_args)
  axis_args$side <- 1
  if (prob) {
    axis_args$at <- unique(c(pretty(x_data), v_data))
  } else {
    axis_args$at <- pretty(x_data)
  }
  do.call(graphics::axis, axis_args)
  if (!is.null(axis_args$lwd)) {
    graphics::box(lwd = axis_args$lwd)
  } else {
    graphics::box()
  }
  # Add top scale?
  if (top_scale) {
    top_vals <- pretty(t_data)
    if (prob) {
      top_vals <- top_vals[top_vals < max(x$u_vec)]
      axis_args$side <- 3
      axis_args$at <- 100 * stats::ecdf(x$data)(top_vals)
      axis_args$labels <- top_vals
      do.call(graphics::axis, axis_args)
    } else {
      top_vals <- unique(c(top_vals, x$v_ps))
      axis_args$side <- 3
      axis_args$at <- stats::quantile(x$data, probs = top_vals / 100)
      axis_args$labels <- top_vals
      do.call(graphics::axis, axis_args)
    }
  }
  # Add a legend?
  if (add_legend){
    if (prob) {
      legend_args$legend <- paste(v_data, "%")
      do.call(graphics::legend, legend_args)
    } else {
      legend_args$legend <- signif(v_data, 2)
      do.call(graphics::legend, legend_args)
    }
  }
  return(invisible(xy_args))
}

# ============================== summary.ithresh ==============================

#' Summarizing measures of threshold predictive performance
#'
#' \code{summary} method for class \code{"ithresh"}
#'
#' @param object an object of class \code{"ithresh"}, a result of a call to
#'   \code{ithresh}.
#' @param ... Additional optional arguments. At present no optional
#'   arguments are used.
#' @return Returns a numeric matrix with 5 columns and \code{n_v} rows,
#'   where \code{n_v} is an argument to \code{\link{ithresh}} that
#'   determines how many of the largest training thresholds are used
#'   a validation thresholds.  The columns contain:
#' \itemize{
#'   \item column 1: the validation threshold v
#'   \item column 2: the sample quantile to which the validation threshold
#'     corresponds
#'   \item column 3: the best training threshold u judged using the
#'     validation threshold v
#'   \item column 4: the sample quantile to which the best training
#'     threshold corresponds
#'   \item column 5: the index of the vector \code{u_vec} of training
#'     thresholds to which the threshold in column2 corresponds
#' }
#' @examples
#' u_vec_gom <- quantile(gom, probs = seq(0, 0.9, by = 0.05))
#' gom_cv <- ithresh(data = gom, u_vec = u_vec_gom, n_v = 3)
#' summary(gom_cv)
#' @seealso \code{\link{ithresh}} for threshold selection in the i.i.d. case
#'   based on leave-one-out cross-validation.
#' @seealso \code{\link{plot.ithresh}} for the S3 plot method for objects of
#'   class \code{ithresh}.
#' @seealso \code{\link{print.ithresh}} Prints the threshold weights.
#' @export
summary.ithresh <- function(object, ...) {
  if (!inherits(object, "ithresh")) {
    stop("use only with \"ithresh\" objects")
  }
  # Find the best training threshold for each validation threshold
  which_best <- apply(object$pred_perf, 2, which.max)
  u_best <- object$u_vec[which_best]
  res <- cbind(object$v_vec, object$v_ps, u_best, object$u_ps[which_best],
               which_best)
  rownames(res) <- 1:length(object$v_vec)
  colnames(res) <- c("v", "v quantile", "best u", "best u quantile", "index of u_vec")
  return(res)
}

# ================================ print.ithresh =============================

#' Print method for objects of class \code{"ithresh"}
#'
#' \code{print} method for class \code{"ithresh"}.
#'
#' @param x an object inheriting from class \code{"ithresh"}, a result of a
#'   call to \code{\link{ithresh}}.
#' @param digits An integer. Used for number formatting with
#'   \code{\link[base]{format}} and \code{\link[base:Round]{signif}}.
#' @param ... Additional optional arguments. At present no optional
#'   arguments are used.
#' @details Prints a matrix of the estimated threshold weights.
#'   Each row gives the weights for each training threshold for a given
#'   validation threshold.  The row and column names are the approximate
#'   quantile levels of the thresholds.
#' @return The argument \code{x}, invisibly, as for all
#'   \code{\link[base]{print}} methods.
#' @seealso \code{\link{ithresh}} for threshold selection in the i.i.d. case
#'   based on leave-one-out cross-validation.
#' @seealso \code{\link{summary.ithresh}} Summarizing measures of threshold
#'   predictive performance.
#' @seealso \code{\link{plot.ithresh}} for the S3 plot method for objects of
#'   class \code{ithresh}.
#' @seealso \code{\link{predict.ithresh}} for predictive inference for the
#'   largest value observed in N years.
#' @export
print.ithresh <- function(x, digits = 2, ...) {
  if (!inherits(x, "ithresh")) {
    stop("use only with \"ithresh\" objects")
  }
  # Numbers of training and validation thresholds
  n_u <- length(x$u_vec)
  n_v <- length(x$v_vec)
  # Calculate threshold weights
  shoof <- matrix(colMeans(x$pred_perf * !is.infinite(x$pred_perf),
                           na.rm = TRUE), ncol = n_v, nrow = n_u,
                  byrow = TRUE)
  tw <- apply(exp(x$pred_perf - shoof), 2, function(x) x / sum(x, na.rm =TRUE))
  rownames(tw) <- x$u_ps
  colnames(tw) <- x$v_ps
  if (!inherits(x, "bcthresh")) {
    cat("Threshold weights:\n")
  }
  print.default(format(t(tw), digits = digits), print.gap = 1L,
                quote = FALSE)
  return(invisible(x))
}