R/plotCPO.R

Defines functions plotCPO

Documented in plotCPO

#' @title plot conditional predictive ordinate
#' @description
#' Plot the conditional predictive ordinate (CPO) for each individual of a 
#' fitted model generated by \code{BayesSUR} which is a \code{BayesSUR} object. 
#' CPO is a handy posterior predictive check because it may be used to identify 
#' outliers, influential observations, and for hypothesis testing across 
#' different non-nested models (Gelfand 1996).
#' @importFrom graphics axis box text par abline
#' @name plotCPO
#' 
#' @param x an object of class \code{BayesSUR}
#' @param outlier.thresh threshold for the CPOs. The default is 0.01.
#' @param outlier.mark mark the outliers with the response names. 
#' The default is \code{FALSE}
#' @param scale.CPO scaled CPOs which is divided by their maximum. 
#' The default is \code{TRUE}
#' @param x.loc a vector of features distance
#' @param axis.label a vector of predictor names which are shown in CPO plot. 
#' The default is \code{NULL} only showing the indices. The value \code{"auto"} 
#' show the predictor names from the original data.
#' @param mark.pos location of the marked text relative to the point
#' @param las graphical parameter of plot.default
#' @param cex.axis graphical parameter of plot.default
#' @param mark.color color of the marked text. The default color is red
#' @param mark.cex font size of the marked text. The default font size is 0.8
#' @param xlab a title for the x axis
#' @param ylab a title for the y axis
#' @param ... other arguments
#' 
#' @details The default threshold for the CPOs to detect the outliers is 0.01 
#' by Congdon (2005). It can be tuned by the argument \code{outlier.thresh}.
#'
#' @references Statisticat, LLC (2013). \emph{Bayesian Inference.} Farmington, CT: Statisticat, LLC.
#' @references Gelfand A. (1996). \emph{Model Determination Using Sampling Based Methods}. In Gilks W., Richardson S., Spiegelhalter D. (eds.), Markov Chain Monte Carlo in Practice, pp. 145–161. Chapman & Hall, Boca Raton, FL.
#' @references Congdon P. (2005). \emph{Bayesian Models for Categorical Data}. John Wiley & Sons, West Sussex, England.
#'
#' @examples
#' data("exampleEQTL", package = "BayesSUR")
#' hyperpar <- list(a_w = 2, b_w = 5)
#'
#' set.seed(9173)
#' fit <- BayesSUR(
#'   Y = exampleEQTL[["blockList"]][[1]],
#'   X = exampleEQTL[["blockList"]][[2]],
#'   data = exampleEQTL[["data"]], outFilePath = tempdir(),
#'   nIter = 10, burnin = 0, nChains = 1, gammaPrior = "hotspot",
#'   hyperpar = hyperpar, tmpFolder = "tmp/", output_CPO = TRUE
#' )
#'
#' ## check output
#' # plot the conditional predictive ordinate (CPO)
#' plotCPO(fit)
#'
#' @export
plotCPO <- function(x, outlier.mark = TRUE, outlier.thresh = 0.01, 
                    scale.CPO = TRUE, x.loc = FALSE, axis.label = NULL, 
                    las = 0, cex.axis = 1, 
                    mark.pos = c(0, -.01), mark.color = 2, mark.cex = 0.8,
                    xlab = "Observations", ylab = NULL, ...) {
  x$output[-1] <- paste(x$output$outFilePath, x$output[-1], sep = "")
  if (is.null(x$output$CPO)) {
    stop("Please specify argument output_CPO in BayesSUR()!")
  }

  CPO <- as.matrix(read.table(x$output$CPO))

  rownames(CPO) <- rownames(as.matrix(read.table(x$output$Y, header = TRUE)))
  colnames(CPO) <- colnames(as.matrix(read.table(x$output$Y, header = TRUE)))
  CPO_idx <- seq_len(nrow(CPO))

  if (is.null(ylab)) {
    ylab <- ifelse(scale.CPO, "scaled CPOs", "CPOs")
  }

  name.observations <- rownames(CPO)

  if (is.null(axis.label)) {
    x.loc <- CPO_idx
    names(x.loc) <- CPO_idx
  } else {
    if (axis.label[1] == "auto") {
      x.loc <- CPO_idx
      names(x.loc) <- name.observations
    } else {
      if (!x.loc[1]) {
        x.loc <- seq_along(axis.label)
      } else {
        if (length(axis.label) != length(x.loc)) {
          stop("The given predictor names are not consistent with the data")
        }
      }
      names(x.loc) <- axis.label
    }
  }

  opar <- par(no.readonly = TRUE)
  on.exit(par(opar))
  par(xpd = FALSE)
  if (ncol(CPO) > 1) {
    if (scale.CPO) CPO <- CPO / max(CPO)
    plot.default(as.vector(CPO) ~ rep(CPO_idx, times = ncol(CPO)), 
                 xlim = c(1, nrow(CPO)), ylim = c(0, max(CPO)), 
                 xaxt = "n", bty = "n", ylab = ylab, xlab = xlab, 
                 main = "Conditional predictive ordinate", pch = 19)
    axis(1, at = x.loc, labels = names(x.loc), las = las, cex.axis = cex.axis)
    box()

    # mark the names of the specified response variables corresponding to the given responses
    if (outlier.mark) {
      if (min(CPO) > outlier.thresh) {
        message("NOTE: The minimum CPO is larger than the threshold of the (scaled) CPO!\n")
      } else {
        name.responses <- colnames(CPO)
        text(rep(CPO_idx, times = ncol(CPO))[
          which(as.vector(CPO) <= outlier.thresh)] + mark.pos[1], 
          as.vector(CPO[CPO < outlier.thresh]) + mark.pos[2], 
          labels = rep(name.responses, 
                       each = nrow(CPO))[as.vector(CPO) < outlier.thresh], 
          col = mark.color, cex = mark.cex)
        abline(h = outlier.thresh, lty = 2, col = mark.color)
      }
    }
  } else {
    if (scale.CPO) CPO <- CPO / max(CPO)
    plot.default(CPO ~ seq_along(CPO), xaxt = "n", bty = "n", 
                 xlim = c(1, length(CPO)), 
                 ylim = c(min(CPO) + mark.pos[2] * 2, max(CPO)), 
                 ylab = ylab, xlab = xlab, 
                 main = "Conditional predictive ordinate", pch = 19)
    axis(1, at = x.loc, labels = names(x.loc), las = las, cex.axis = cex.axis)
    box()

    # mark the names of the specified response variables corresponding to the given responses
    if (outlier.mark) {
      if (min(CPO) > outlier.thresh) {
        message("NOTE: The minimum CPO is larger than the threshold of the (scaled) CPO!\n")
      } else {
        opar <- par(no.readonly = TRUE)
        on.exit(par(opar))
        par(new = TRUE)
        plot.default(CPO[CPO < outlier.thresh] ~ which(CPO < outlier.thresh), 
                     xaxt = "n", bty = "n", xlim = c(1, length(CPO)), 
                     ylim = c(min(CPO) + mark.pos[2] * 2, max(CPO)), 
                     ylab = "", xlab = "", main = "", 
                     pch = 19, col = mark.color)
        text(which(CPO <= outlier.thresh) + mark.pos[1], 
             CPO[CPO < outlier.thresh] + mark.pos[2], 
             labels = name.observations[CPO < outlier.thresh], 
             col = mark.color, cex = mark.cex)
        abline(h = outlier.thresh, lty = 2, col = mark.color)
      }
    }
  }
}

Try the BayesSUR package in your browser

Any scripts or data that you put into this service are public.

BayesSUR documentation built on Aug. 28, 2023, 5:09 p.m.