##' Create a Percentile plot
##' @description A percentile plot is a means of simultaneously displaying the
##' summary statistics of multiple numeric variables whilst highlighting the
##' position of a particular element within the distribution of each variable.
##' @details Values are scaled so that the mean is centrered in the plotting
##' window. This is a similar methodology as used by Public Health England in
##' the area profiles section of 'Finger tips' tools
##' (http://www.phoutcomes.info/).
##'
##' 'Good performance' is typified by being to
##' the right of the central line so where good performance is associated with
##' a low value, e.g. smoking prevalence, it is important to ensure this is
##' reflected in the lowIsGood parameter.
##'
##' Within the function, margin settings change automatically to fit
##' longest individual label on single line. Shorter labels are preferential to
##' avoid the plotting window becoming too condensed. Within an Rmd file, labels
##' can be followed by a sequential numbering system with a key below to allow a
##' fuller description.
##' @param data data frame or matrix of numeric and/or integer variables
##' @param pick element or row of data to highlight with dot overlaid on bars.
##' Must be an integer between 1 and nrow(data). Default is 1.
##' @param additionalProbs pair of values at which to draw additional vertical
##' lines on each bar. Must be a numeric vector of length two where values are
##' greater than zero and less than one. Default is c(0.05, 0.95).
##' @param lowIsGood boolean vector related to variable plotting order
##' indicating whether good performance is associated with a low value. If TRUE,
##' the scale is inverted so that low values appear on the right and high values
##' on the left of the plot. Default is FALSE
##' @param localValueCex size of the dot. Default is 20 divided by number of
##' variables in data which works as a rough rule of thumb.
##' @param localValueLabel The generic label to apply to each row of data. Appears
##' at bottom of plot. Default is "Local value"
##' @param localValueLabelXpos determines where on the plot to place the local
##' value label. Used in mtext. Default is -0.23
##' @param plotLabels character vector of labels to insert to the left of the
##' plot. Default is colnames of data.
##' @param cexLabels size of the labels on left-hand side of plot. Default is 1.
##' @param plotTitle the title given to the plot. This is placed towards the
##' left-hand corner of the window.
##' @param plotTitleXpos determines where on the plot to place the plot title.
##' Used in mtext. Default is -0.4
##' @param plotTitleYpos the value of the horizontal line in top margin to
##' position plot title. Used in mtext. Default is 2
##' @param plotTitleCex font size of plot title. Used in mtext. Default is 1.5
##' @param horizLegend Whether to display the legend horizontally. Default is
##' TRUE.
##' @param ncolLegend Number of columns used in legend. Default is 1 but is
##' overidden if horizLegend is set to TRUE.
##' @param xInterspLegend character interspacing factor for horizontal legend.
##' Default is 1.
##' @param insetLegend inset distance(s) from the margins. Default is c(0, -0.3)
##' @param upperMar top margin height. Default is 5.
##' @param minMaxLabels character vector of length 2 for left and right extremes
##' of the plot. Default is c("Worst", "Best"). Alternative could be
##' c("Lowest", "Highest") with lowIsGood all set to FALSE.
##' @param minLabelXpos determines where on the plot to place the min label.
##' Used in mtext. Default is -0.07
##' @param maxLabelXpos determines where on the plot to place the max label.
##' Used in mtext. Default is 1.07
##' @param avgLabel Currently the function is only set up to calculate the mean
##' of the numeric vectors (even if they are rates or percentages with different
##' denominators. A future development should be to give additional options of
##' a vector of weighted means and/or to calculate the median. Default is "Mean
##' average"
##' @author Mark Chambers mark.chambers@@medway.gov.uk
##' @export
##' @return Does not return anything at the moment. Creates a plot as a side-effect.
##' @examples
##' attach(mtcars)
##' x <- mtcars$wt
##' y <- mtcars$mpg
##'
##' df1 <- data.frame(var1 = x, var2 = y)
##' df2 <- data.frame(var1 = x, var2 = y, var3 = 1:length(wt), var4 = x, abcdefghijklmno = mpg)
##' df3 <- data.frame(var1 = x, var2 = y, var3 = 1:length(wt), var4 = x, var5 = y, var6 = x, var7 = y, var8 = 1:length(x), var9 = x, abcdefghijklmno = y)
##'
##' percentile_plot(df3, pick = 5, lowIsGood = c(rep(FALSE, 9), TRUE))
##'
##' percentile_plot(df1, pick = 5)
percentile_plot <- function(data,
pick = 1,
additionalProbs = c(0.05, 0.95),
lowIsGood = rep(FALSE, ncol(data)),
localValueCex = (20/ncol(data)),
localValueLabel = "Local\nvalue",
localValueLabelXpos = -0.23,
plotLabels = colnames(data),
cexLabels = 1,
plotTitle = "Percentile plot",
plotTitleXpos = -0.4,
plotTitleYpos = 2,
plotTitleCex = 1.5,
horizLegend = TRUE,
ncolLegend = 1,
xInterspLegend = 1,
insetLegend = c(0, -0.3),
upperMar = 5,
minMaxLabels = c("Worst", "Best"),
minLabelXpos = -0.07,
maxLabelXpos = 1.07,
avgLabel = "Mean\naverage") {
# Data traps
# pick must be an integer between 1 and nrow(data)
stopifnot(pick > 0 & pick <= nrow(data))
# x must be a matrix or dataframe
stopifnot(class(data) %in% c("matrix", "data.frame"))
# Additional probabilities must be greater than zero and less than 1
stopifnot(additionalProbs > 0 & additionalProbs < 1)
stopifnot(length(additionalProbs) == 2)
# Store number of variables to setup empty plot and cycle through in for loop
numVariables <- ncol(data)
# Determine necessary left margin par settings to accommodate longer labels
leftMar <- ceiling(max(nchar(plotLabels), na.rm = TRUE) / (1.5 / cexLabels))
# Setup an empty plotting window
op <- par(mar = c(5, leftMar, upperMar, 2) + 0.1, xpd = TRUE)
plot(c(0, 1), c(0, numVariables),
type = "n",
xlim = c(-0.1, 1.1),
xaxt = "n",
yaxt = "n",
xlab = "",
ylab = "",
bty = "n")
axis(side = 2,
labels = plotLabels,
at = (1:numVariables) - 0.5,
las = 2,
tick = FALSE,
line = 3,
hadj = 1,
cex.axis = cexLabels)
# Annotation
mtext(avgLabel, side = 1, line = 1, at = 0.5, cex = 0.8, font = 1)
mtext(minMaxLabels[1], side = 1, line = 1, at = minLabelXpos, cex = 0.8, font = 1)
mtext(minMaxLabels[2], side = 1, line = 1, at = maxLabelXpos, cex = 0.8, font = 1)
mtext(localValueLabel, side = 1, line = 1, at = localValueLabelXpos, cex = 0.8, font = 1)
# Legend
prob1 <- paste0(additionalProbs[1] * 100, "%")
prob2 <- paste0(additionalProbs[2] * 100, "%")
legend("topright",
c(prob1, "Range", localValueLabel, "Middle 50%", prob2),
pch = c(NA, NA, 16, NA, NA),
fill = c(NA, "lightgrey", NA, "grey", NA),
border = NA,
col = c("red", NA, "blue", NA, "darkgreen"),
lty = c(1, NA, NA, NA, 1),
horiz = horizLegend,
ncol = ncolLegend,
bty = "o",
box.col = "grey",
inset = insetLegend,
cex = 0.8,
x.intersp = xInterspLegend)
# Title
mtext(text = plotTitle,
side = 3,
line = plotTitleYpos,
at = plotTitleXpos,
cex = plotTitleCex)
# Cycle through the variables in data to plot bars etc
for (i in 1:numVariables) {
x <- data[, i]
# Store values for later processing
xMin <- min(x, na.rm = TRUE)
xMax <- max(x, na.rm = TRUE)
xMean <- mean(x, na.rm = TRUE)
xBtmQuartile <- quantile(x, probs = 0.25, na.rm = TRUE)
xTopQuartile <- quantile(x, probs = 0.75, na.rm = TRUE)
tmp <- quantile(x, probs = additionalProbs, na.rm = TRUE)
# names(tmp) <- gsub("%", "", names(tmp))
xAddProb1 <- tmp[1]
xAddProb2 <- tmp[2]
# Determine worst and best values
worst <- xMin
best <- xMax
if(lowIsGood[i] == TRUE) {
worst <- xMax
best <- xMin
}
# Centre data on mean
dist <- max(c(xMean - xMin), (xMax - xMean))
xScaleMin <- xMean - dist
xScaleMax <- xMean + dist
# Determine worst and best scale values
ScaleWorst <- xScaleMin
ScaleBest <- xScaleMax
if(lowIsGood[i] == TRUE) {
ScaleWorst <- xScaleMax
ScaleBest <- xScaleMin
}
# Chart data
value <- (x[pick] - ScaleWorst) / (ScaleBest - ScaleWorst)
plotWorst <- (worst - ScaleWorst) / (ScaleBest - ScaleWorst)
btmQuartile <- (xBtmQuartile - ScaleWorst) / (ScaleBest - ScaleWorst)
topQuartile <- (xTopQuartile - ScaleWorst) / (ScaleBest - ScaleWorst)
plotMean <- 0.5
plotBest <- (best - ScaleWorst) / (ScaleBest - ScaleWorst)
plotAddProb1 <- (xAddProb1 - ScaleWorst) / (ScaleBest - ScaleWorst)
plotAddProb2 <- (xAddProb2 - ScaleWorst) / (ScaleBest - ScaleWorst)
if(lowIsGood[i] == TRUE) {
plotAddProb1 <- (xAddProb2 - ScaleWorst) / (ScaleBest - ScaleWorst)
plotAddProb2 <- (xAddProb1 - ScaleWorst) / (ScaleBest - ScaleWorst)
}
# Plot the data
y1 <- i + 0.1 - 1
y2 <- i + 0.9 - 1
doty <- i - 0.5
rect(plotWorst, y1, plotBest, y2, col = "lightgrey", border = NA)
rect(btmQuartile, y1, topQuartile, y2, col = "grey", border = NA)
segments(x0 = 0.5, y0 = y1, x1 = 0.5, y1 = y2, lwd = 2)
segments(x0 = plotAddProb1, y0 = y1, x1 = plotAddProb1, y1 = y2, lwd = 2, col = "red")
segments(x0 = plotAddProb2, y0 = y1, x1 = plotAddProb2, y1 = y2, lwd = 2, col = "darkgreen")
points(value, doty, pch = 16, col = "blue", cex = localValueCex)
# Annotation
mtext(x[pick], side = 2, line = 1, at = doty, las = 2, cex = 0.8, col = "blue")
text(-0.02, doty, worst, cex = 0.8, pos = 2)
text(1.01, doty, best, cex = 0.8, pos = 4)
}
par(op)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.