R/PlotPValueHist.R
In MetabolomicsBasics: Basic Functions to Investigate Metabolomics Data Matrices

Documented in PlotPValueHist

#' @title PlotPValueHist.
#' @description \code{PlotPValueHist} will take a named matrix of P-values
#'   (i.e. numeric between 0..1) and plot histograms for each column. In
#'   the easiest case this matrix is generated by \link{MetaboliteANOVA}.
#' @details See examples.
#' @param out matrix/data.frame; P-value table from 'MetaboliteANOVA.R' with factors in named columns and trait P-values in rows.
#' @param method Multiple testing correction method applied, piped to \code{p.adjust()}.
#' @param xl xlab.
#' @param yl ylab.
#' @param frac.col Render histogram bars in stacked colors according to provided color vector (should be a vector of valid color names of length=nrow(out)).
#' @param ... Passed on to \code{par}. Useful to adjust \code{cex}.
#' @return NULL. Will generate a P-value histogram plot.
#' @examples
#' # load raw data and sample description
#' raw <- MetabolomicsBasics::raw
#' sam <- MetabolomicsBasics::sam
#'
#' # compute P-values according to specified ANOVA model (simple and complex)
#' head(pvals <- MetaboliteANOVA(dat = raw, sam = sam, model = "GT+Batch+Order"))
#' PlotPValueHist(out = pvals)
#'
#' # adjust multiple testing correction method and y lable
#' PlotPValueHist(out = pvals, method = "none", yl = "Number of Genes")
#'
#' # color bars (by chance or according to a metabolite group)
#' PlotPValueHist(out = pvals, frac.col = rep(2:3, length.out = nrow(pvals)))
#' met <- MetabolomicsBasics::met
#' met$Name[grep("ine$", met$Name)]
#' PlotPValueHist(out = pvals, frac.col = 2 + 1:nrow(pvals) %in% grep("ine$", met$Name))
#' @export
#' @importFrom graphics layout axTicks rect
#' @importFrom stats p.adjust
PlotPValueHist <- function(out = NULL, method = "BH", xl = "ANOVA P-values", yl = "Number of metabolites", frac.col = NULL, ...) {
  # internal function
  FillExpressionVector <- function(x) {
    ev <- vector("expression", length(x))
    for (i in 1:length(ev)) {
      ev[i] <- substitute(expression(10^e), list(e = x[i]))[2]
    }
    return(ev)
  }
  stopifnot(all(abs(out - 0.5) <= 0.5, na.rm = T))

  # prepare layout
  opar <- par(no.readonly = TRUE)
  on.exit(par(opar))
  tmp.plots <- 2 * ncol(out)
  tmp.layout <- rbind(cbind(tmp.plots + 1, matrix(1:(tmp.plots), nrow = 2, byrow = T)), tmp.plots + 2)
  tmp.layout[nrow(tmp.layout), 1] <- 0
  layout(mat = tmp.layout, widths = c(0.025, rep(0.975 / ncol(out), ncol(out))), heights = c(rep(0.475, 2), 0.05))

  par(...)

  # plot multiple-testing corrected p-value histograms
  if (is.null(attr(out, "p.adjust.method")) | attr(out, "p.adjust.method")=="none") {
    out.adj <- apply(out, 2, p.adjust, method)
  } else {
    out.adj <- out
    method <- attr(out, "p.adjust.method")
  }
  tmp.seq <- seq(0, 1, .05)
  tmp.max <- max(apply(out.adj, 2, function(y) {
    hist(y, breaks = tmp.seq, plot = F)$counts
  }))
  if (is.null(frac.col)) {
    for (i in 1:ncol(out)) {
      par(mar = c(1.25, 3.5, 1.75, 0) + .25)
      hist(out.adj[, i], breaks = tmp.seq, ylim = c(0, tmp.max), main = colnames(out)[i], xlab = "", ylab = "", las = 1)
    }
  } else {
    for (i in 1:ncol(out)) {
      tmp.mat <- cbind(0, sapply(split(out.adj[, i], factor(frac.col)), function(y) {
        hist(y, breaks = tmp.seq, plot = F)$counts
      }))
      par(mar = c(1.25, 3.5, 1.75, 0) + .25)
      graphics::plot(x = c(0, 1), y = c(0, tmp.max), main = colnames(out)[i], xlab = "", ylab = "", las = 1, type = "n")
      for (j in 2:ncol(tmp.mat)) rect(tmp.seq[-length(tmp.seq)], apply(tmp.mat[, 1:(j - 1), drop = F], 1, sum), tmp.seq[-1], apply(tmp.mat[, 1:j], 1, sum), col = colnames(tmp.mat)[j])
    }
  }

  # plot log-transformed p-value histograms
  out.adj <- log10(out.adj)
  if (any(is.infinite(out.adj))) out.adj[is.infinite(out.adj)] <- min(out.adj[is.finite(out.adj)])
  tmp.seq <- seq(ifelse(floor(min(out.adj, na.rm = T)) <= -1, floor(min(out.adj, na.rm = T)), -1), 0)
  tmp.max <- max(apply(out.adj, 2, function(y) {
    hist(y, breaks = tmp.seq, plot = F)$counts
  }))
  tmp.min <- 0.9
  for (i in 1:ncol(out)) {
    par(mar = c(3, 3.5, 0.25, 0) + .25)
    a <- hist(out.adj[, i], breaks = tmp.seq, plot = F)$counts
    filt <- a != 0
    graphics::plot(x = range(tmp.seq), y = c(tmp.min, tmp.max), log = "y", type = "n", axes = F, ann = F)
    rect(
      xleft = tmp.seq[-length(tmp.seq)][filt],
      ybottom = rep(tmp.min, length(tmp.seq) - 1)[filt],
      xright = tmp.seq[-1][filt],
      ytop = a[filt]
    )
    graphics::axis(1, at = axTicks(1)[seq(1, length(axTicks(1)))], tick = F, labels = FillExpressionVector(axTicks(1)[seq(1, length(axTicks(1)))]))
    graphics::axis(1, at = axTicks(1), labels = FALSE)
    graphics::axis(2, las = 2)
  }

  # Labels...
  par(mar = rep(0, 4) + .1)
  graphics::plot(0, 0, ann = F, axes = F, type = "n")
  graphics::text(x = 0, y = 0, labels = paste(yl, " (n=", sum(is.finite(out.adj[, i])), ")", sep = ""), srt = 90, cex = 4 / 3)
  graphics::plot(0, 0, ann = F, axes = F, type = "n")
  graphics::text(x = 0, y = 0, labels = paste(xl, " (", method, ")", sep = ""), cex = 4 / 3)

  invisible(NULL)
}