#' @export
#' @import grDevices
#' @import grid
#' @importFrom utils modifyList
#' @title QQ plot matrix
#'
#' @description \code{eda_qqmat} Generates a matrix of empirical QQ plots
#'
#' @param dat Data frame.
#' @param x Continuous variable.
#' @param fac Categorical variable.
#' @param p Power transformation to apply to the continuous variable.
#' @param tukey Boolean determining if a Tukey transformation should be adopted
#' (FALSE adopts a Box-Cox transformation).
#' @param q.type An integer between 1 and 9 selecting one of the nine quantile
#' algorithms. (See \code{quantile}tile function).
#' @param upper Boolean determining if both upper and lower triangular matrix
#' should be plotted. If set to \code{FALSE}, only the lower triangular matrix
#' is plotted.
#' @param xylim X and Y axes limits.
#' @param resid Boolean determining if residuals should be plotted. Residuals
#' are computed using the \code{stat} parameter.
#' @param stat Statistic to use if residuals are to be computed. Currently
#' \code{mean} (default) or \code{median}.
#' @param plot Boolean determining if plot should be generated.
#' @param grey Grey level to apply to plot elements (0 to 1 with 1 = black).
#' @param pch Point symbol type.
#' @param p.col Color for point symbol.
#' @param p.fill Point fill color passed to \code{bg} (Only used for \code{pch}
#' ranging from 21-25).
#' @param tail.pch Tail-end point symbol type (See \code{tails}).
#' @param tail.p.col Tail-end color for point symbol (See \code{tails}).
#' @param tail.p.fill Tail-end point fill color passed to \code{bg}
#' (Only used for \code{tail.pch} ranging from 21-25).
#' @param size Point symbol size (0-1).
#' @param tic.size Size of tic labels (defaults to 0.8).
#' @param alpha Point transparency (0 = transparent, 1 = opaque). Only
#' applicable if \code{rgb()} is not used to define point colors.
#' @param med Boolean determining if median lines should be drawn.
#' @param q Boolean determining if grey box highlighting the \code{inner}
#' region should be displayed.
#' @param tails Boolean determining if points outside of the \code{inner} region
#' should be symbolized differently. Tail-end points are symbolized via the
#' \code{tail.pch}, \code{tail.p.col} and \code{tail.p.fill} arguments.
#' @param inner Fraction of mid-values to highlight in \code{q} or \code{tails}.
#' Defaults to the inner 75 percent of values.
#' @param text.size Size for category text in diagonal box.
#' @param ... Not used
#'
#' @details The function will generate an empirical QQ plot matrix. Most of the
#' arguments available in `eda_qq` are echoed in this function. The one
#' notable difference is the default settings. By default, `eda_qqmat` will
#' generate a plain vanilla set of plots. \cr
#' \cr
#' The QQ plot matrix is most effective in comparing residuals after the data
#' are fitted by the mean or median. To plot the residuals, set
#' \code{resid=TRUE}. By default, the \code{mean} is used. You can change the
#' statistic to the median by setting \code{stat=median}. \cr
#' \cr
#' The function also allows for batch transformation of values via the
#' \code{p} argument. The transformation is applied to the data prior to
#' computing the residuals.
#'
#' @returns Returns a list with the following components:
#'
#' \itemize{
#' \item \code{data}: List with input \code{x} and \code{y} values for each
#' group. May be interpolated to smallest quantile batch if batch sizes
#' don't match. Values will reflect power transformation defined in \code{p}.
#' \item \code{p}: Transformation applied to original values.}
#'
#' @references
#'
#' \itemize{
#' \item John M. Chambers, William S. Cleveland, Beat Kleiner, Paul A. Tukey.
#' Graphical Methods for Data Analysis (1983)
#' \item \href{../articles/qq.html}{Quantile-Quantile plot article}}
#'
#' @examples
#'
#' # Default output
#' singer <- lattice::singer
#' eda_qqmat(singer, height, voice.part)
#'
#' # Symbolize points outside of the "inner" region using an open point symbol
#' eda_qqmat(singer, height, voice.part, tails = TRUE)
#'
#' # Set the inner region to cover 80% and change the outer point symbol to "+"
#' eda_qqmat(singer, height, voice.part, inner = 0.8, tails = TRUE, tail.pch = 3)
#'
#' # Add the upper triangle to the matrix
#' eda_qqmat(singer, height, voice.part, upper = TRUE)
#'
#' # Plot residuals after fitting mean to each batch
#' eda_qqmat(singer, height, voice.part, resid = TRUE)
#'
#' # Log transform the data, then plot the residuals after fitting the mean model
#' eda_qqmat(iris, Petal.Length, Species, resid = TRUE, p = 0)
#'
#' # Fit the median model instead of the mean
#' eda_qqmat(iris, Petal.Length, Species, resid = TRUE, p = 0, stat = median)
#'
#' # Shade the "inner" regions (defaults to the mid 70% of values)
#' eda_qqmat(iris, Petal.Length, Species, resid = TRUE, q = TRUE, p = 0)
#'
#' # Change inner region point symbols to dark orange and change inner region
#' # range to cover 90% of mid values
#' eda_qqmat(iris, Petal.Length, Species, resid = TRUE, p = 0, inner = 0.9,
#' tail.pch = 3, p.fill = "orange2")
eda_qqmat <- function(dat, x, fac, p = 1L, tukey = FALSE, q.type = 5,
upper = FALSE, xylim = NULL, resid = FALSE, stat = mean,
plot = TRUE, grey = 0.6, pch = 21, p.col = "grey40",
p.fill = "grey60", size = 1, text.size = 1,
tail.pch = 21, tail.p.col = "grey70", tail.p.fill = NULL,
tic.size = 0.7, alpha = 0.8, q = FALSE, tails = FALSE,
med = FALSE, inner = 0.75, ...) {
if (!requireNamespace("grid", quietly = TRUE)) {
stop(
"Package \"grid\" must be installed to use this function.",
call. = FALSE
)
}
# Check for invalid arguments
input <- names(list(...))
check <- input %in% names(formals(cat))
if (any(!check)) warning(sprintf("%s is not a valid argument.",
paste(input[!check], collapse = ", ")))
# Parameters check
if (! as.character(substitute(stat)) %in% c("mean", "median"))
stop("Stat must be either the mean or the median")
if (inner < 0.25)
stop("The inner parameter must be greater than 0.25.")
# Extract data ----
# Get values and factors. Drop levels not present in the data
x <- eval(substitute(x), dat)
fac <- as.factor(eval(substitute(fac), dat))
if(is.factor(fac)) fac <- droplevels(fac)
# Remove missing values from the data
which_na <- which(is.na(x))
if(length(which_na > 0)){
x <- x[-which_na]
fac <- fac[-which_na]
if(is.factor(fac)) fac <- droplevels(fac)
warning(cat(length(which_na),"rows were removed due to NAs being present.\n"))
}
# Re-express data if required
if (p != 1) {
x <- eda_re(x, p = p, tukey = tukey)
}
x.isna <- is.na(x)
rm.nan <- ifelse( any(x.isna), 1 , 0)
# Re-expression may produce NaN values. Output warning if TRUE
if( rm.nan > 0 ) {
warning(paste("\nRe-expression produced NaN values. These observations will",
"be removed from output. This will result in fewer points",
"in the ouptut."))
bad <- x.isna
x <- x[!bad]
}
# Set plot elements color
plotcol <- rgb(1-grey, 1-grey, 1-grey)
# Set point color parameters.
if(!is.null(alpha)){
if(p.col %in% colors() & p.fill %in% colors() ){
p.col <- adjustcolor( p.col, alpha.f = alpha)
p.fill <- adjustcolor( p.fill, alpha.f = alpha)
}
}
# Get upper/lower bounds of quantile box
b.val = c(.5 - inner / 2 , .5 + inner / 2)
# Loop through each pairs of factors
lst <- split(x, fac)
if(resid == TRUE){
lst <- lapply(lst, FUN = function(x){x - stat(x)})
}
fac_un <- unique(fac)
fac_num <- length(fac_un)
# Set plot parameters
lmin <- min(dev.size("cm")) # Get smallest plot window dimension
dim.cm <- lmin / fac_num * 1.3 # plot width and height
num.plots <- (fac_num^2) # Number of plots
# Get axes limits for plots
lim.buffer = 0.05
if(is.null(xylim)){
xylim <- range(unlist(lst))
xylim <- c(xylim[1] - diff(xylim) * lim.buffer , xylim[2] + diff(xylim) * lim.buffer)
}
# Create empty list (to be used for data output)
lstout <- list()
# Reset plot window
grid.newpage()
# Compute margins needed to accommodate labels
#label <- as.character(signif(max(xylim),2))
label <- as.character(format(signif(max(xylim),2), scientific = FALSE))
label_width <- convertWidth(stringWidth(label), "lines", valueOnly = TRUE)
label_height <- convertWidth(stringHeight(label), "lines", valueOnly = TRUE)
x_margin <- unit(label_width + 2, "lines")
y_margin <- unit(label_height + 5, "lines") # Horizontal margin for x-axis text
# Define main viewport
main <- viewport(width = unit(1, "npc") - x_margin,
height = unit(1, "npc") - y_margin,
layout=grid.layout(fac_num, fac_num, respect = TRUE))
pushViewport(main)
jj <- 0
for(j in levels(fac_un)){
jj <- jj + 1
ii <- 0
for(i in levels(fac_un)){
ii <- ii + 1
x <- unlist(lst[i])
y <- unlist(lst[j])
# Check if only lower triangular matrix.
if(upper == TRUE | ii <= jj){
# Center on mean or median if requested
# if (resid == TRUE){
# x <- x - stat(x)
# y <- y - stat(y)
# }
# Generate qqplot using base function
qq <- qqplot(x,y, plot.it = FALSE, qtype = q.type)
x <- qq$x
y <- qq$y
# Save values for output
dfout <- data.frame(x,y)
row.names(dfout) <- NULL
lstout[[paste(i,j)]] <- dfout
# Set point color parameters.
if(!is.null(alpha)){
if(p.col %in% colors() & p.fill %in% colors() ){
p.col <- adjustcolor( p.col, alpha.f = alpha)
p.fill <- adjustcolor( p.fill, alpha.f = alpha)
}
}
# Get quantile parameters
qx <- quantile(x, b.val, qtype = q.type)
qy <- quantile(y, b.val, qtype = q.type)
if(tails == TRUE){
# If tails are to be plotted separately, identify points outside of the
# inner region
if (!is.na(table(x < qx[1])["TRUE"]) & !is.na(table(y < qy[1])["TRUE"])){
lower.tail <- min(table(x < qx[1])["TRUE"], table(y < qy[1])["TRUE"])
} else{
lower.tail <- 0
}
if (!is.na(table(x > qx[2])["TRUE"]) & !is.na(table(y > qy[2])["TRUE"])){
upper.tail <- max(table(x > qx[2])["TRUE"], table(y > qy[2])["TRUE"])
} else {
upper.tail <- 0
}
inner.tails <- (lower.tail+1):(length(x) - upper.tail)
outer.tails <- -inner.tails
}
# Compute median values
medx <- median(x)
medy <- median(y)
# Generate plot ----
if(plot == TRUE){
vp <- viewport(layout.pos.col = ii, layout.pos.row = jj, xscale = xylim, yscale = xylim)
pushViewport(vp)
grid.rect(gp = gpar(col = plotcol))
# Define point size based in plot window size
vp_width <- convertX(unit(1, "npc"), "inches", valueOnly = TRUE)
vp_height <- convertY(unit(1, "npc"), "inches", valueOnly = TRUE)
point_size <- min(vp_width, vp_height) * 10 * size
# Add points
if( ii != jj){
if(tails != TRUE){
grid.points(x=unit(x,"native"), y=unit(y,"native"), size = unit(point_size, "points"),
gp = gpar(fill = p.fill, col = p.col), pch = pch)
} else {
grid.points(x=unit(x[inner.tails],"native"),
y=unit(y[inner.tails],"native"),
size = unit(point_size, "points"),
gp = gpar(fill = p.fill, col = p.col), pch = pch)
if (length(x[outer.tails]) !=0){ # Nothing to plot if tail index is empty
grid.points(x=unit(x[outer.tails],"native"),
y=unit(y[outer.tails],"native"),
size = unit(point_size, "points"),
gp = gpar(fill = tail.p.fill, col = tail.p.col), pch = tail.pch)
}
}
# Add diagonal (1:1) line
grid.lines(gp = gpar(cex = 0.8, col = plotcol))
} else {
# Print labels on diagonal
# Compute factor label size based on label text width
vp_side <-convertWidth(unit(1, "npc"), "mm", valueOnly = TRUE)
max_width <- vp_side * 0.5 # We use 70% of the available width to fit the text
font_size <- max_width / max(nchar(as.character(fac_un))) # Estimate font size by dividing by text length
if(font_size > 2) font_size <- 2
grid.text(i, gp = gpar(col = "grey40", cex = font_size * text.size))
}
# Add y-axis
if( (ii == 1) & (jj %% 2 !=0)) {
grid.yaxis(gp = gpar(cex = tic.size, col = plotcol))
} else if( (ii == fac_num) & (jj %% 2 ==0)) {
grid.yaxis(gp = gpar(cex = tic.size, col = plotcol), main = FALSE)
}
# Add x-axis
if(jj == 1 & (ii %% 2 == 0)) {
grid.xaxis(main = FALSE, gp = gpar(cex = tic.size, col = plotcol))
} else if (jj == fac_num & (ii %% 2 != 0)){
grid.xaxis( gp = gpar(cex = tic.size, col = plotcol))
}
# grid.text(paste(i,j)) # Used to debug plot placement
# Add medians
if(med == TRUE& ii != jj){
grid.segments(x0 = medx, x1 = medx, y0 = xylim[1], y1 = xylim[2],
default.units = "native", gp = gpar(col = "grey80", lty = 2))
grid.segments(y0 = medy, y1 = medy, x0 = xylim[1], x1 = xylim[2],
default.units = "native", gp = gpar(col = "grey80", lty = 2))
}
# Add core boxes ----
if(q == TRUE & ii != jj){
grid.polygon(x = c(qx[1], qx[2], qx[2], qx[1]),
y = c(xylim[1], xylim[1], xylim[2], xylim[2]),
gp = gpar(fill = rgb(0,0,0,0.05), col = NA),
default.units = "native")
grid.polygon(y = c(qy[1], qy[2], qy[2], qy[1]),
x = c(xylim[1], xylim[1], xylim[2], xylim[2]),
gp = gpar(fill = rgb(0,0,0,0.05), col = NA),
default.units = "native")
}
upViewport()
} # Close plot loop
}
} # Close ii loop
} # Close jj loop
# Remind user if power parameter was set to value other than 1
if ( p !=1 )
message(paste0("Note that a power transformation of ",p," was applied to the data",
" before they were processed for the plot."))
popViewport(0)
invisible(list(qq_values = lstout, p = p))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.