#' Function to produce correlation panel plots
#'
#' Fairly versatile function to produce paneled correlation plots, where the lower
#' triangle of panels shows the x,y relationship, the diagonal shows a histogram of each
#' column of data, and the upper triangle summarizes the correlation.
#'
#' @param dataframe The object you want to pull data from to plot.
#' @param ids Optional vector of column IDs or names you want to plot from dataframe.
#' @param hist.breaks The number of breaks desired in the diagonal histograms. Default is
#' 10.
#' @param hist.col The color of the histograms. Default is black.
#' @param hist.names The titles to be printed above histograms, if desired. Will be left
#' blank if not provided.
#' @param hist.names.cex The size of the hist.names titles. Defaults to 1.
#' @param hist.names.spacing The space of the title below the top of the histogram plot.
#' Default is -2. Increasing this number will move the title closer to the top of the
#' panel.
#' @param hist.y The y-axis label in the diagonal histograms. Defaults to blank, but can
#' be set to, e.g., "Frequency".
#' @param summary.cex The size of the summary stat text in the upper half of the plots.
#' Default is 2.
#' @param neg.color The background color of the panel in the upper half of the plots for
#' correlations with negative coefficients. It is very important that this be formatted as
#' a vector of length four, minimum 0, maximum 1, corresponding to rgb + alpha channel.
#' All other formats will fail. Default is red, with alpha 0.5.
#' @param mid.color The background color of the panel in the upper half of the plots for
#' non-existent correlations. Format the same as neg.color. Default is white.
#' @param pos.color The background color of the panel in the upper half of the plots for
#' correlations with positive coefficients. Format the same as neg.color. Default is blue,
#' with alpha 0.5.
#' @param pt.col The color of the points in scatterplots. Default is black.
#' @param par.oma Used to format the space around the entire plot (the area outside all
#' panels). The format is c(bottom, left, top, right). Default is c(4, 4, 2, 2).
#' @param par.tcl Used to control the length of the tick marks inside the panels. Default
#' is -0.25.
#' @param par.mgp Used to control the distance of the labels from the tick marks inside
#' the panels. Default is c(2, 0.2, 0).
#' @param par.lab Used to control the number of tick marks inside the panels. Format is
#' c(x, y, len). Although len is not implemented in R, it is necessary to pass along.
#' Default is c(5, 5, 7).
#' @param left.text The labels to be printed along each panel on the left margin of the
#' figure. A common thing people will want to do is to reverse the columns
#' plotted along the bottom axis and drop the last one. Example of how to do this, if,
#' for example, you are plotting all the columns of your data frame except the first:
#' c(rev(names(yourDF[,2:dim(yourDF)[2]])),""). You actually don't need to bind the blank
#' in in this example, but it's shown here for completeness.
#' @param bottom.text Like left.text, but for the bottom axis labels.
#' @param left.text.cex The size of the left.text labels. Defaults to 1.
#' @param bottom.text.cex The size of the bottom.text labels. Defaults to 1.
#' @param expansion Controls how much the histogram is shrunk down to facilitate plotting
#' of histogram names. Also controls how zoomed out the scatterplots are, which might be
#' useful for ensuring dots do not interfere with axis labels.
#' @param ... Other arguments that can be passed to plot, including cex for the
#' dots in the scatterplot and for the axis labels.
#'
#' @details There are a large variety of options that can be customized on this function,
#' but fundamentally it places scatterplots in the lower triangle of the plot, histograms
#' along the diagram, and a summary of the correlation in the upper triangle.
#'
#' @return A paneled correlation plot.
#'
#' @export
#'
#' @importFrom grDevices colorRampPalette rgb
#' @importFrom graphics hist mtext par plot rect text title axis
#' @importFrom stats cor.test
#'
#' @examples
#' exData <- data.frame(
#' col1=jitter(sort(rnorm(n=200, mean=0, sd=1), decreasing=TRUE), amount=1),
#' col2=jitter(sort(rnorm(n=200, mean=10, sd=1)), amount=4),
#' col3=jitter(sort(rnorm(n=200, mean=-3, sd=1)), amount=3))
#'
#' corrplotter(dataframe=exData, hist.breaks=20, hist.col="purple",
#' hist.names=c("one","two","three"), left.text=c("units1", "units2", ""),
#' bottom.text=c("other_units1", "other_units2", "other_units3"),
#' summary.cex=2, neg.color=c(1,0,0,1), mid.color=c(1,1,0,1), pos.color=c(0,1,0,1),
#' expansion=0.5, cex=0.5, pt.col="gray")
corrplotter <- function(dataframe, ids, hist.breaks, hist.col, hist.names,
hist.names.cex, hist.names.spacing, hist.y, summary.cex, neg.color, mid.color,
pos.color, pt.col, par.oma, par.tcl, par.mgp, par.lab, left.text, bottom.text,
left.text.cex, bottom.text.cex, expansion, ...)
{
if(missing(ids))
{
dataframe <- dataframe
}
else
{
dataframe <- dataframe[,ids]
}
#fill in all the missing graphical parameters with the defaults
if(missing(par.oma))
{
par.oma <- c(4, 4, 2, 2)
}
else
{
par.oma <- par.oma
}
if(missing(par.tcl))
{
par.tcl <- -0.25
}
else
{
par.tcl <- par.tcl
}
if(missing(par.mgp))
{
par.mgp <- c(2, 0.2, 0)
}
else
{
par.mgp <- par.mgp
}
if(missing(par.lab))
{
par.lab <- c(5, 5, 7)
}
else
{
par.lab <- par.lab
}
if(missing(hist.breaks))
{
hist.breaks <- 10
}
else
{
hist.breaks <- hist.breaks
}
if(missing(hist.col))
{
hist.col <- "black"
}
else
{
hist.col <- hist.col
}
if(missing(pt.col))
{
pt.col <- "black"
}
else
{
pt.col <- pt.col
}
if(missing(hist.names))
{
hist.names <- rep("", dim(dataframe)[2])
}
else
{
hist.names <- hist.names
}
if(missing(hist.names.cex))
{
hist.names.cex <- 1
}
else
{
hist.names.cex <- hist.names.cex
}
if(missing(hist.names.spacing))
{
hist.names.spacing <- -2
}
else
{
hist.names.spacing <- hist.names.spacing
}
if(missing(hist.y))
{
hist.y <- ""
}
else
{
hist.y <- hist.y
}
if(missing(left.text))
{
left.text <- rep("", dim(dataframe)[2])
}
else
{
left.text <- left.text
}
if(missing(bottom.text))
{
bottom.text <- rep("", dim(dataframe)[2])
}
else
{
bottom.text <- bottom.text
}
if(missing(left.text.cex))
{
left.text.cex <- 1
}
else
{
left.text.cex <- left.text.cex
}
if(missing(bottom.text.cex))
{
bottom.text.cex <- 1
}
else
{
bottom.text.cex <- bottom.text.cex
}
if(missing(neg.color))
{
neg.color <- c(1,0,0,0.5)
}
else
{
neg.color <- neg.color
}
if(missing(mid.color))
{
mid.color <- c(1,1,1,1)
}
else
{
mid.color <- mid.color
}
if(missing(pos.color))
{
pos.color <- c(0,0,1,0.5)
}
else
{
pos.color <- pos.color
}
if(missing(summary.cex))
{
summary.cex <- 2
}
else
{
summary.cex <- summary.cex
}
if(missing(expansion))
{
expansion <- 0
}
else
{
expansion <- expansion
}
#check here to ensure you have equal number of names, columns and things you
#want to plot
if( length(hist.names) != length(left.text) &
length(hist.names) != length(bottom.text) &
length(hist.names) != dim(dataframe)[2])
{
stop("Your text names and desired panel dimensions are not compatible")
}
#set up the gridded plot to have as many panels as the square of the number of columns
par(mfrow = c(dim(dataframe)[2], dim(dataframe)[2]), mar=c(0,0,0,0),
oma=par.oma, tcl=par.tcl, mgp=par.mgp, lab=par.lab)
#outrageously complicated step to deal with pasting a vector into rgb
neg.color <- as.matrix(neg.color)
mid.color <- as.matrix(mid.color)
pos.color <- as.matrix(pos.color)
#create a vector 201 long from -1 to 1 for the possible correlation coefficients
corColors <- seq(from=-1, to=1, length=201)
#assign names to the corColors. these names are actually a color ramp from neg.color
#(negative corrs) to mid.color to pos.color
names(corColors) <- colorRampPalette(c(
rgb(neg.color[1,], neg.color[2,], neg.color[3,], neg.color[4,]),
rgb(mid.color[1,], mid.color[2,], mid.color[3,], mid.color[4,]),
rgb(pos.color[1,], pos.color[2,], pos.color[3,], pos.color[4,])),
alpha = TRUE)(201)
#loop through each column
for(i in 1:dim(dataframe)[2])
{
#plot against each other column
for(j in 1:dim(dataframe)[2])
{
#plot the upper triangle
if(j > i)
{
#create a temporary correlation of column i as a function of j
temp <- cor.test(x=dataframe[,j], y=dataframe[,i])
#find the interval the corr coefficient p-value belongs to
rectColor <- names(corColors[findInterval(temp$estimate, corColors)])
#plot a fake plot that does have borders but no axes or labels
plot(1, type="n", xaxt="n", yaxt="n", xlab="", ylab="")
rect(xleft=0, xright=2, ybottom=0, ytop=2, col=rectColor)
#add the correlation coefficient, rounded to 2 digits, and the
#significance on the next line. note the "\n" to start a newline
text(x=1, y=1, paste(expression(" r ="), round(temp$estimate, 2), "\n",
paste(expression("p ="), round(temp$p.value, 2))), cex=summary.cex)
}
#plot the diagonals
if(j == i)
{
#create a temporary histogram so you can determine what the maximum is
#and expand y-axis accordingly
tempHist <- hist(dataframe[,j], breaks=hist.breaks, plot=FALSE)
#calculate the range between the default xlim
xRange <- diff(range(tempHist$breaks))
hist(dataframe[,j], main="", breaks=hist.breaks, col=hist.col,
ylim=c(0, max(tempHist$counts) * (1 + expansion)),
xlim=c( min(tempHist$breaks) - (xRange * expansion),
max(tempHist$breaks) + (xRange * expansion)), ...)
#add the y-axis labels and titles
axis(side=2, labels=hist.y, tick=FALSE,
at=max(tempHist$counts) * (1 + expansion)/2, line=-1.5)
title(hist.names[i], line=hist.names.spacing, cex.main=hist.names.cex)
}
#plot the lower triangle
if(j < i)
{
#first calculate the range between the min and max for x and for y
xRange <- diff(range(dataframe[,j]))
yRange <- diff(range(dataframe[,i]))
plot(dataframe[,i]~dataframe[,j],
xlim=c( min(dataframe[,j]) - (xRange * expansion),
max(dataframe[,j]) + (xRange * expansion)),
ylim=c( min(dataframe[,i]) - (yRange * expansion),
max(dataframe[,i]) + (yRange * expansion)), col=pt.col, ...)
}
}
}
#add unit labels to left and bottom axes. the outer margin of the figure isn't changed
#by messing with par mar above, and runs from zero to 1. determine where labels should
#go by making a sequence from 0 to 1 of length equal to twice the number of panels
#plus 1, then only selecting the even numbered (excluding zero)
places <- seq(from=0, to=1, length=2*dim(dataframe)[2]+1)
places <- places[1:dim(dataframe)[2]*2]
#add the left side labels
mtext(text=left.text, at=places, line=2, side=2, outer=TRUE, cex=left.text.cex)
#add the bottom side labels if provided
mtext(text=bottom.text, at=places, line=2, side=1, outer=TRUE, cex=bottom.text.cex)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.