Nothing
#' Variations of parallel coordinate plots
#'
#' \code{\link{ggparallel}} implements and combines different types of parallel
#' coordinate plots for categorical data: hammock plots, parallel sets plots,
#' common angle plots, and common angle plots with a hammock-like adjustment
#' for line widths.
#'
#' Parallel sets have been suggested by \cite{kosara:2006} as a visualization
#' technique to incorporate categorical variables into a parallel coordinate
#' plot introduced by \cite{wegman:1990} and \cite{inselberg:1985}. The parallel
#' sets implemented here are reduced to representations of neighboring
#' two-dimensional relationships only rather than the hierarchical version
#' originally suggested.
#'
#' Both versions, however, show perceptual problems with interpreting line
#' widths, leading to potentially wrong conclusions about the data. The hammock
#' display, introduced by \cite{schonlau:2003}, and the common angle plots are
#' two approaches at fixing this problem: in Hammock plots the linewidth is
#' adjusted by a factor countering the strength of the illusion, in the common
#' angle plot all lines are adjusted to show the same angle - making line widths
#' again comparable across ribbons.
#'
#' Additionally, we can also adjust ribbons in the common angle display for the
#' angle, to make them appear having the same width (or height) across the
#' display. We refer to this method as \code{adj.angle}.
#'
#' @param vars list of variable names to be included in the plotting. Order of
#' the variables is preserved in the display
#' @param data data frame
#' @param weight weighting variable - use character string
#' @param method plotting method to use - one of \code{angle},
#' \code{adj.angle}, \code{parset}, or \code{hammock}, for a hammock plot
#' the aspect ratio needs to be fixed.
#' @param alpha level of alpha blending for the fill color in ribbons, value
#' has to be between 0 and 1, defaults to 0.5.
#' @param width width of variables
#' @param order flag variable with three levels -1, 0, 1 for levels in
#' decreasing order, levels in increasing order and levels unchanged. This
#' variable can be either a scalar or a vector
#' @param ratio used for methods with angle adjustments (method =
#' \code{'hammock', 'adj.angle'}): specifies the height (width for horizontal
#' displays) of the widest line as ratio of the overall display height (width
#' for horizontal displays).
#' @param label binary variable (vector), whether labels should be shown.
#' @param label.size numeric value to determine the size in which labels are shown, defaults to 4
#' @param label.colour character of colour in which the label should be shown. Ignored, if `label` is FALSE.
#' @param text.angle numeric value to determine the angle under which labels are shown.
#' @param text.offset (vector) of values for offsetting the labels
#' @param asp aspect ratio of the plot - it will be set to a default of 1 in
#' the case of hammock plots.
#' @param same.level are all variables using the same levels? If yes, simplify the labelling
#' @param ... passed on directly to all of the ggplot2 commands
#' @return returns a ggplot2 object that can be plotted directly or used as base
#' layer for additional modifications.
#' @export
#' @import ggplot2 plyr reshape2
#' @example inst/examples/ggparallel-ex.R
ggparallel <- function(vars=list(), data, weight=NULL, method="angle",
alpha=0.5, width = 0.25, order = 1, ratio=NULL,
asp = NULL, label = TRUE, label.colour="grey90", label.size=4, text.angle=90,
text.offset=NULL, same.level=FALSE, ...) {
### error checking
vars <- unlist(vars)
k = length(vars)
if (k < 2) message("Error: ggparallel needs at least two variables. Use vars=list('X', 'Y')")
## if user doesn't specify the weight, assign value of 1.
data$weight <- weight
if (is.null(weight)) data$weight <- 1
if (is.character(weight)) data$weight <- data[,weight]
if (is.null(ratio)) ratio <- nrow(data)/sum(data$weight)
## if ordering is selected, organize x and y axis by weight
## make order a vector of length length(vars)
order <- rep(order, length=length(vars))
for (i in 1:length(vars)){
if (! is.factor(data[,vars[i]]))
data[,vars[i]] <- factor(data[,vars[i]])
if (order[i] != 0)
data[,vars[i]] <- stats::reorder(data[,vars[i]], data$weight,
function(x) if (order[i] > 0) sum(x)
else -sum(x)
)
}
llist <- NULL
for (i in unique(vars)) {
if (!same.level) levels(data[,i]) <- paste(i, levels(data[,i]), sep=":")
llist <- c(llist, levels(data[,i]))
}
if ((method=="hammock"))# | (method=="adj.angle"))
if (is.null(asp)) asp <- 1
## helper function
getRibbons <- function(xid,yid) {
## get the names of the x and y variables
x <- vars[xid]
y <- vars[yid]
xname <- x
yname <- y
## introduce new variables as fail-safe, if local binding fails:
variable <- NULL
value <- NULL
Freq <- NULL
Nodeset <- NULL
tangens <- NULL
dx2 <- NULL
midx <- NULL
midy <- NULL
ypos <- NULL
varn <- NULL
ymax <- NULL
ymin <- NULL
ymid <- NULL
xoffset <- NULL
## create the data table, x, y, and weight
dfxy <- as.data.frame(stats::xtabs(data$weight~data[,x] + data[,y]))
dfxy <- subset(dfxy, Freq > 0)
names(dfxy)[1:2] <- c(xname, yname)
## get the ordering for data according to x-axis categories
idx <- order(dfxy[,x], dfxy[,y], decreasing = TRUE)
## find the position of X-axis connector
dfxy$X[idx] <- sum(dfxy$Freq[idx]) - cumsum(dfxy$Freq[idx])
dfxy$X[idx] <- dfxy$X[idx] + dfxy$Freq[idx]
## get the ordering for data according to y-axis categories
idx <- order(dfxy[,y], dfxy[,x], decreasing = TRUE)
## find the position of the Y-axis connector
dfxy$Y[idx] <- sum(dfxy$Freq[idx]) - cumsum(dfxy$Freq[idx])
dfxy$Y[idx] <- dfxy$Y[idx] + dfxy$Freq[idx]
## assign row number as id
dfxy$id <- 1:nrow(dfxy)
dfm <- melt(dfxy, measure.var=c("X", "Y"))
levels(dfm$variable) <- c(x,y)
dfxy$XX <- dfxy[,xname]
dfxy$YY <- dfxy[,yname]
dfm$Nodeset <- dfm[,xname]
dfm$Nodeset <- factor(dfm$Nodeset, levels=llist)
dfm$xoffset <- c(width/2,-width/2)[as.numeric(dfm$variable)]
dfm$xid <- xid - 1
dfm$yid <- yid
if (method=="parset") {
r <- geom_ribbon(aes(x=as.numeric(variable)+xoffset+xid,
ymin=value-Freq,
ymax= value, group=id,
fill=Nodeset, colour=Nodeset), alpha=alpha, data=dfm)
}
if (method == "angle") {
dfm$x <- with(dfm, as.numeric(variable)+xoffset+xid)
dfm<- ddply(dfm, .(id), transform,
dx=max(x)-min(x),
dy=max(value) -min(value)
)
dfm$tangens = dfm$dy/dfm$dx
maxslope <- 1.3*max(dfm$tangens) # add 15% of offset on each end of each variable
dfm$newdx <- with(dfm, dy/maxslope)
dfm2 <- dfm
dfm2$xoffset <- with(dfm, (abs(xoffset) + (dx-newdx)/2) * sign(xoffset))
dfm2$x <- with(dfm2, as.numeric(variable)+xoffset+xid)
dfm3 <- ddply(dfm2, names(dfm2)[2], transform,
dx2 = max(x[which(tangens==max(tangens))])
)
dfm3 <- ddply(dfm3, .(id), transform, shiftx = max(x)-dx2)
dfm3$x <- dfm3$x - dfm3$shiftx
dfm <- rbind(dfm, dfm3[,-(16:17)])
r <- geom_ribbon(aes(x=x,ymin=value -Freq, ymax= value, group=id,
fill=Nodeset, colour=Nodeset), alpha=alpha, data=dfm)
}
if (method == "adj.angle") {
dfm$x <- with(dfm, as.numeric(variable)+xoffset+xid)
dfm<- ddply(dfm, .(id), transform,
dx=max(x)-min(x),
dy=max(value) -min(value)
)
dfm$tangens = dfm$dy/dfm$dx
maxslope <- 1.3*max(dfm$tangens) # add 15% of offset on each end of each variable
dfm$newdx <- with(dfm, dy/maxslope)
dfm2 <- dfm
dfm2$xoffset <- with(dfm, (abs(xoffset) + (dx-newdx)/2) * sign(xoffset))
dfm2$x <- with(dfm2, as.numeric(variable)+xoffset+xid)
dfm3 <- ddply(dfm2, names(dfm2)[2], transform,
dx2 = max(x[which(tangens==max(tangens))])
)
dfm3 <- ddply(dfm3, .(id), transform, shiftx = max(x)-dx2)
dfm3$x <- dfm3$x #- dfm3$shiftx
dfm <- rbind(dfm, dfm3[,-(16:17)])
dfm <- transform(dfm, ymin=value-Freq, ymax=value)
dfm <- transform(dfm, ymid=(ymax+ymin)/2)
# plot.asp <- length(vars)/(1.1*sum(data$weight))*asp
# qplot(x, ymid, data=dfm, geom=c("line"), alpha=I(0.5), group=id, colour=factor(gear), size=Freq)+scale_size(range=4.2*c(min(dfm$Freq),max(dfm$Freq))) + scale_colour_discrete() + theme(legend.position="none") + ylim(c(0, 1.05*sum(data$weight)))
#browser()
r <- list(geom_line(
aes(x=x,y=ymid, group=id, colour=Nodeset, linewidth=Freq),
alpha=alpha, data=dfm), range=c(min(dfm$Freq),max(dfm$Freq)))
# r <- list(geom_line(aes(x=x,y=ymid, group=id, colour=Nodeset, size=Freq), alpha=alpha, data=dfm),
# scale_size(guide="none", range=ratio*max(dfm$Freq)*c(min(dfm$Freq),max(dfm$Freq)))) #+ scale_colour_discrete()
}
if (method=="hammock") {
maxwidth = ratio/2*sum(data$weight)
xtab <- ddply(dfxy, xname, summarise, value=sum(Freq))
xtab$midx <- with(xtab, cumsum(value)- value/2)
dfm <- merge(dfm, xtab[,c(xname, "midx")], by=xname)
ytab <- ddply(dfxy, yname, summarise, value=sum(Freq))
ytab$midy <- with(ytab, cumsum(value)- value/2)
dfm <- merge(dfm, ytab[,c(yname, "midy")], by=yname)
plot.asp <- length(vars)/(1.1*sum(data$weight))*asp
dfm$varn <- as.numeric(dfm$variable)
dfm <- transform(dfm,
x = min(varn+xoffset+xid),
xend = max(varn+xoffset+xid)
)
dfm <- ddply(dfm , .(id), transform,
tangens = max(midy)-min(midx)
)
dfm$tangens <- with(dfm, tangens/max(xend-x)*plot.asp)
dfm$width <- with(dfm, Freq/cos(atan(tangens)))
dfm$width <- with(dfm, width*maxwidth/max(width))
dfm <- ddply(dfm, .(id), transform,
y=c(midx[1], midy[1])[varn]
)
r <- geom_ribbon(aes(x=as.numeric(variable)+xoffset+xid,
ymin=y-width, ymax=y+width, group=id,
fill=Nodeset, colour=Nodeset), alpha=alpha, data=dfm) #, drop=FALSE)
}
r
}
## end helper function
## local variables
variable <- NULL
Freq <- NULL
Nodeset <- NULL
ypos <- NULL
gr <- list()
for (i in 1:(length(vars)-1))
gr[[i]] <- getRibbons(i,i+1)
if (method=="adj.angle") {
prange <- c(NA,NA)
for (i in 1:(length(vars)-1)) {
prange <- range(c(prange, gr[[i]][[2]]), na.rm=T)
gr[[i]] <- gr[[i]][[1]]
}
gr[[1]] <- list(gr[[1]], scale_size(guide="none", range=ratio*prange))
}
subdata <- data[,c("weight", unlist(vars))]
for (i in unlist(vars)) subdata[,i] <- as.character(subdata[,i])
dfm <- melt(subdata, id.var="weight")
names(dfm)[3] <- "Nodeset"
dfm$Nodeset <- factor(dfm$Nodeset, levels=llist)
llabels <- NULL
if (label) {
label.stats <- ddply(dfm, .(variable, Nodeset), summarize,
n = length(weight),
weight=sum(weight)
)
maxWeight <- sum(label.stats$weight)/length(unique(label.stats$variable))
label.stats$ypos <- cumsum(label.stats$weight)-(as.numeric(label.stats$variable)-1)*maxWeight
label.stats$ypos <- label.stats$ypos-label.stats$weight/2
if (is.null(text.offset)) text.offset <- 0
label.stats$text.offset <- rep(text.offset, length=nrow(label.stats))
varnames <- paste(unlist(vars), sep="|", collapse="|")
label.stats$labels <- gsub(sprintf("(%s):(.*)",varnames),"\\2", as.character(label.stats$Nodeset))
llabels <- list(#geom_text(aes(x=as.numeric(variable)+text.offset, y=ypos, label=labels),
# colour = "grey20", data=label.stats, angle=text.angle, size=label.size),
geom_text(aes(x=as.numeric(variable)+0.01+text.offset, y=ypos-0.01, label=labels),
colour = label.colour, data=label.stats, angle=text.angle, size=label.size))
}
theme.layer <- NULL
if (!is.null(asp)) theme.layer <- theme(aspect.ratio=asp)
dfm$Nodeset <- factor(dfm$Nodeset, levels = rev(levels(dfm$Nodeset)))
ggplot() + xlab("") + gr + theme.layer +
geom_bar(aes(weight=weight, x=variable, fill=Nodeset, colour=Nodeset), width=width, data=dfm) +
llabels +
scale_x_discrete(expand=c(0.1, 0.1))
# theme(drop=FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.