#' @rdname ggslopegraph
#' @title Create Slopegraph from a data frame using ggplot2
#' @description Convert an R data frame (containing a panel dataset, where rows are observations and columns are time periods) into an Edward Tufte-inspired Slopegraph using ggplot2
#' @param data An observation-by-period data.frame, with at least two columns. Missing values are allowed.
#' @param main A character string specifying a title. Passed to \code{\link[ggplot2]{ggtitle}}.
#' @param xlab A character string specifying an x-axis label. Passed to \code{\link[ggplot2]{scale_x_continuous}}.
#' @param ylab A character string specifying an y-axis label. Passed to \code{\link[ggplot2]{scale_y_continuous}}, or \code{\link[ggplot2]{scale_y_reverse}} if \code{yrev = TRUE}.
#' @param xlabels The labels to use for the slopegraph periods. Default is \code{names(data)}.
#' @param xlim A two-element numeric vector specifying the y-axis limits.
#' @param ylim A two-element numeric vector specifying the y-axis limits.
#' @param labpos.left A numeric value specifying the x-axis position of the left-side observation labels. If \code{NULL}, labels are omitted.
#' @param labpos.right A numeric value specifying the x-axis position of the right-side observation labels. If \code{NULL}, labels are omitted.
#' @param leftlabels The parameter for the rightside observation labels. Default is using row indexes.
#' @param rightlabels The parameter for the rightside observation labels. Default is using row indexes.
#' @param xbreaks Passed to \code{breaks} in \code{\link[ggplot2]{scale_x_continuous}}.
#' @param ybreaks Passed to \code{breaks} in \code{\link[ggplot2]{scale_y_continuous}}.
#' @param yrev A logical indicating whether to use \code{\link[ggplot2]{scale_y_reverse}} rather than the default \code{\link[ggplot2]{scale_y_continuous}}.
#' @param decimals The number of decimals to display for values in the plot. Default is \code{0} (none).
#' @param col.lines A vector of colors for the slopegraph lines. Default is \code{par('fg')}.
#' @param col.lab A vector of colors for the observation labels. Default is \code{par('fg')}.
#' @param col.num A vector of colors for the number values. Default is \code{par('fg')}. If \code{NA}, labels are not drawn.
#' @param lwd A vector of line width values for the slopegraph lines.
#' @param offset.x A small offset for \code{segments}, to be used when positioning the numeric values. Default is \code{NULL} (set automatically based on the data.
#' @param cex.lab A numeric value indicating the size of row labels. Default is \code{3}. See \code{\link[ggplot2]{geom_text}}.
#' @param cex.num A numeric value indicating the size of numeric labels. Default is \code{3}. See \code{\link[ggplot2]{geom_text}}.
#' @param na.span A logical indicating whether line segments should span periods with missing values. The default is \code{FALSE}, such that some segments are not drawn.
#' @details A slopegraph is an interesting visualization because it involves the representation of a simple observation-by-period matrix of data values as a plot but the production of that visualization entails a number of data transformations that are not immediately obvious from the visual simplicity of the graph itself.
#'
#' Specifically, a slopegraph involves three distinct visual components, each of which must be drawn using a slightly different data structure. Those elements are: (1) the observation labels, (2) the numeric value labels of each observation-period data point, and (3) the line segments connecting the numeric labels. To draw these three elements requires transforming the input into three different data structures.
#'
#' First, to draw the observation labels requires constructing a new data frame containing the observation labels (from the input data frame's \code{rownames} attribute), the constant x-left and x-right label positions, and the vertical positions of the left- and right-side labels.
#'
#' Second, to draw the numeric value labels requires creating a \dQuote{tidy} data frame based upon the positions of the values in the input data frame. Specifically, a tidy representation of the data is a two-column data frame containing: (1) the column value of each data point (identified by \code{\link[base]{col}}) to specify horizontal position, and (2) the value of the data point itself which is also its vertical position. This consists of a basic wide-to-long reshape procedure (using \code{\link[stats]{reshape}}).
#'
#' Third, to draw the line segments requires creating a \dQuote{tidy} data frame that consists of one row for each segment, by identifying row-adjacent values and identifying variables for x1 and x2 and y1 and y2 end-points of each segment. Another \dQuote{row} identifying variable is needed to relationally map this data frame back to the original observations (e.g., to color the segments). This step is performed by \code{\link{segmentize}}.
#'
#' @return A \code{\link[ggplot2]{ggplot}} object.
#' @examples
#' require("ggplot2")
#' ## Tufte's Cancer Graph (to the correct scale)
#' data(cancer)
#' ggslopegraph(cancer, col.lines = 'gray',
#' xlabels = c('5 Year','10 Year','15 Year','20 Year'))
#'
#' ## Tufte's GDP Graph
#' data(gdp)
#' ggslopegraph(gdp, col.line='gray', xlabels = c('1970','1979'),
#' main = 'Current Receipts of Goverment\nas a Percentage of Gross Domestic Product') +
#' theme_bw()
#'
#' ## Ranking of U.S. State populations
#' data(states)
#' ggslopegraph(states,
#' main = 'Relative Rank of U.S. State Populations, 1790-1870',
#' yrev = TRUE)
#'
#' cls <- rep("black", nrow(states))
#' cls[rownames(states) == "South Carolina"] <- "red"
#' cls[rownames(states) == "Tennessee"] <- "blue"
#' ggslopegraph(states, main = 'Relative Rank of U.S. State Populations, 1790-1870',
#' yrev = TRUE, col.lines = cls, col.lab = cls)
#'
#' ## ranking of U.S. Bachelors Degrees fields
#' data(bachelors)
#' bachelors[] <- lapply(bachelors, function(x) rank(x))
#' names(bachelors) <- substring(names(bachelors), 3, 7)
#' ggslopegraph(bachelors, offset.x = 0, xlim = c(1, 25), col.num = NA, labpos.left = NULL)
#'
#' @seealso For a base graphics version, use \code{\link{slopegraph}}.
#' @import ggplot2
#' @importFrom stats reshape
#' @export
ggslopegraph <-
function(data,
main = NULL,
xlab = "",
ylab = "",
xlabels = names(data),
xlim = c(-1L,ncol(data)+2L),
ylim = range(data, na.rm = TRUE),
labpos.left = 0.8,
labpos.right = ncol(data) + 0.2,
leftlabels = NULL,
rightlabels = NULL,
xbreaks = seq_along(xlabels),
ybreaks = NULL,
yrev = ylim[1] > ylim[2],
decimals = 0L,
col.lines = "black",
col.lab = "black",
col.num = "black",
lwd = 0.5,
offset.x = NULL,
cex.lab = 3L,
cex.num = 3L,
na.span = FALSE)
{
# check decimal formatting
fmt <- paste0("%0.", decimals, "f")
# check data
if (ncol(data) < 2) {
stop("'data' must have at least two columns")
}
data[] <- lapply(data, round, decimals)
# segmentize
to_draw <- segmentize(data, na.span = na.span)
# reshape for printing numeric value labels
long <- reshape(data, direction = "long", varying = names(data), v.names = "value", sep = "")
# expand formatting arguments
if (length(col.lines) == 1) {
col.lines <- rep(col.lines, nrow(data))
}
if (length(lwd) == 1) {
lwd <- rep(lwd, nrow(data))
}
if (length(col.num) == 1) {
col.num <- rep(col.num, nrow(data))
}
if (length(col.lab) == 1) {
col.lab <- rep(col.lab, nrow(data))
}
col.num <- col.num[long[["id"]]]
col.lines <- col.lines[to_draw[["row"]]]
lwd <- lwd[to_draw[["row"]]]
# draw
g <- ggplot() +
# x-axis labels
scale_x_continuous(name = xlab, breaks = xbreaks,
labels = xlabels, limits = xlim) +
# title
ggtitle(main) +
if (isTRUE(yrev)) {
scale_y_reverse(name = ylab, breaks = ybreaks, labels = NULL, limits = rev(ylim))
} else {
scale_y_continuous(name = ylab, breaks = ybreaks, labels = NULL, limits = ylim)
}
if (is.null(offset.x)) {
offset.x <- (max(nchar(sprintf(fmt, long[["value"]]))) + 0.02)/2L
}
# segments
g <- g + geom_segment(aes(x = x1 + offset.x,
y = ifelse(y1 == y2, y1, (y1+((y2-y1)*offset.x))),
xend = x2 - offset.x,
yend = ifelse(y1 == y2, y2, (y2-((y2-y1)*offset.x)))),
col = col.lines, size = lwd,
data = to_draw, inherit.aes = FALSE) + guides(fill = FALSE) +
# numeric value labels
geom_text(aes(x = time, y = bump_overlaps(value), label = sprintf(fmt, value)), color = col.num,
data = long, inherit.aes = FALSE,
size = cex.num, hjust = 0.5)
if (is.null(leftlabels)) {
leftlabs <- data[!is.na(data[,1]), 1, drop = FALSE]
} else {
leftlabs <- leftlabels
}
if (is.null(rightlabels)) {
which_right <- data[!is.na(data[,ncol(data)]), ncol(data), drop = FALSE]
} else {
which_right <- rightlabels
}
# left-side row labels
if (!is.null(labpos.left)) {
g <- g + geom_text(aes(x = labpos.left, y = bump_overlaps(leftlabs[,1]),
label = rownames(leftlabs)),
color = col.lab[!is.na(data[,1])],
data = NULL, inherit.aes = FALSE, size = cex.lab, hjust = 1L)
}
# right-side row labels
if (!is.null(labpos.right)) {
g <- g + geom_text(aes(x = labpos.right, y = bump_overlaps(which_right[,1]),
label = rownames(which_right)),
color = col.lab[!is.na(data[,ncol(data)])],
data = NULL, inherit.aes = FALSE, size = cex.lab, hjust = 0L)
}
return(g + theme(legend.position="none") + guides(fill = FALSE))
}
globalVariables(c("x1", "y1", "x2", "y2", "time", "value"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.