# Routines for manipulating colors and making color plots
#' Color schemes
#'
#' Compute a color scheme with a specified number of levels.
#'
#' The first two schemes are \emph{categorical}, providing maximum separation
#' in hue, intended for depicting unordered categories. \code{default.colors}
#' has only dark colors, good for coloring points, while
#' \code{default.colors.w} includes light colors, good for filling regions.
#'
#' The next four schemes are \emph{sequential}, from light to dark, with
#' variation in hue to increase discrimination. They are intended for
#' depicting ordered levels. The sequential order is more easily perceived
#' with these schemes than with the built-in palettes \code{heat.colors},
#' \code{terrain.colors}, and \code{topo.colors}. The ordering can also be
#' seen by the color-blind and when printed in black and white.
#'
#' The main difference between the sequential schemes is the variation in hue,
#' with \code{YR.colors} having the most variation and \code{gray.colors}
#' having the least. Generally you should choose the amount of variation
#' according to the number of levels. I recommend \code{OrRd.colors} for three
#' levels, \code{YlGnBu.colors} for four to eight levels, and \code{YR.colors}
#' beyond eight levels.
#'
#' The last four schemes are \emph{double-ended} or \emph{diverging} schemes,
#' which progress from one hue to a second hue, passing through white in the
#' middle. They are intended for representing signed ordered levels, such as
#' residuals. The main difference between them is the amount of separation
#' between colors, so generally you use \code{GM.colors} when you want a few
#' levels and \code{RYB.colors} when you want many.
#'
#' These functions can be used as the \code{color.palette} parameter to
#' \code{\link{filled.contour}} and \code{\link{color.plot}}, for example.
#'
#' @aliases default.colors default.colors.w YR.colors YlGnBu.colors OrRd.colors
#' gray.colors RYB.colors BrBg.colors RC.colors GM.colors
#' @param n the number of colors desired
#' @return A vector of strings, naming colors.
#' @author Tom Minka
#' @seealso \code{\link{colors}},\code{\link{rainbow}},\code{\link{color.cone}}
#' @references The schemes in \code{YlGnBu.colors}, \code{OrRd.colors},
#' \code{RYB.colors}, and \code{BrBg.colors} are from ColorBrewer. The scheme
#' in \code{YR.colors} is from Howard Seltman.
#'
#' Mark A. Harrower and Cynthia A. Brewer. ColorBrewer: An Online Tool for
#' Selecting Color Schemes for Maps, \emph{The Cartographic Journal}, in press.
#' \url{http://www.colorbrewer.org/},
#' \url{http://www.personal.psu.edu/faculty/c/a/cab38/ColorBrewerBeta.html}
#'
#' Generalized color schemes for Mapping and Visualization. From Cynthia
#' Brewer, Color Use Guidelines for Mapping and Visualization. Reprinted at
#' the Gallery of Data Visualization by Michael Friendly.
#' \url{http://www.math.yorku.ca/SCS/Gallery/images/S12-fullstructureClean.gif}
#'
#' Dan Carr. Color perception, the importance of gray and residuals, on a
#' choropleth map. \emph{Statistical Computing & Graphics Newsletter}
#' 5(1):16-20, 1994
#' \url{http://cm.bell-labs.com/cm/ms/who/cocteau/newsletter/issues/back/v51.pdf}
#' @examples
#'
#' data(Housing)
#' color.plot(Price ~ Rooms + Low.Status, Housing, bg=gray(0.5),
#' color.palette=YlGnBu.colors)
#' color.plot(Price ~ Rooms + Low.Status, Housing, bg=gray(0.5),
#' color.palette=YR.colors)
#' color.plot(Price ~ Rooms + Low.Status, Housing, bg=gray(0.5),
#' color.palette=RYB.colors,nlevels=5)
#'
#' # also see examples for color.cone
#' @export
default.colors <- function(n) ((0:(n-1))%%6+1)
# intended for white background
gray.colors <- function(n) gray(rev(0:(n-1))/n)
# good for objects which are being outlined (e.g. pies)
default.colors.w <- function(n) c(8,seq(len=n-1))
# these are from
# http://www.personal.psu.edu/faculty/c/a/cab38/ColorBrewerBeta2.html
OrRd.colors <- function(n) {
if(n == 7) c("#fef0d9","#fdd49e","#fdbb84","#fc8d59","#ef6548","#d7301f","#99000d")
else if(n == 6) c("#fef0d9","#fdd49e","#fdbb84","#fc8d59","#e34a33","#b2000d")
else if(n == 5) c("#fef0d9","#fdcc8a","#fc8d59","#e34a33","#b2000d")
else if(n == 4) c("#fef0d9","#fdcc8a","#fc8d59","#d7301f")
else if(n == 3) c("#fee8c8","#fdbb84","#e34a33")
else if(n == 2) c("#fdbb84","#e34a33")
else stop("can't make that many levels")
}
#' @export
YlGnBu.colors <- function(n) {
if(n == 8) c("#ffffd9","#edf8b0","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#225ea8","#0c2c83")
else if(n == 7) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#386cb0","#0c2c83")
else if(n == 6) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#2c7fb8","#253494")
else if(n == 5) c("#ffffcc","#a1dab4","#41b6c3","#2c7fb8","#253494")
else if(n == 4) c("#ffffcc","#a1dab4","#41b6c3","#386cb0")
else if(n == 3) c("#edf8b0","#7fcdbb","#2c7fb8")
else if(n == 2) c("yellow","blue")
else stop("can't make that many levels")
}
# diverging
#' @export
BrBg.colors <- function(n) {
if(n == 7) c("#8c510a","#d8b365","#f6e8c3","#f5f5f5","#c7eae5","#5ab4ac","#01665e")
else if(n == 6) c("#8c510a","#d8b365","#f6e8c3","#c7eae5","#5ab4ac","#01665e")
else if(n == 5) c("#a6611a","#dfc27d","#f5f5f5","#80cdc1","#018571")
else if(n == 4) c("#a6611a","#dfc27d","#80cdc1","#018571")
else if(n == 3) c("#d8b365","#f5f5f5","#5ab4ac")
else if(n == 2) c("#d8b365","#5ab4ac")
else stop("can't make that many levels")
}
#' @export
RYB.colors <- function(n) {
if(n == 7) c("#d73d29","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4")
else if(n == 6) c("#d73d29","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4")
else if(n == 5) c("#ca373b","#fdae61","#ffffbf","#abd9e9","#2c7bb6")
else if(n == 4) c("#ca373b","#fdae61","#abd9e9","#2c7bb6")
else if(n == 3) c("#fc8d59","#ffffbf","#91bfdb")
else if(n == 2) c("red","blue")
else stop("can't make that many levels")
}
# sequential WYRK
#' @export
YR.colors <- function(k,lt=0.97,dk=0.03) {
r <- c(lt, rep(lt,k),
rep(lt,k),
seq(lt,dk,len=k+1)[-1])
g <- c(lt, rep(lt,k),
seq(lt,dk,len=k+1)[-1],
rep(dk,k))
b <- c(lt, seq(lt,dk,len=k+1)[-1],
rep(dk,k),
rep(dk,k))
i <- seq(1,length(r),len=k)
rgb(r[i],g[i],b[i])
}
# diverging
#' @export
diverging.colors <- function(n,hmax=0,hmin=(0.5+hmax)%%1,v=1) {
if(n %% 2 == 1) {
# odd case
k <- n%/%2
x <- seq(1,0,len=k+1)
c(hsv(h = hmin, s = x, v),
hsv(h = hmax, s = rev(x[1:k]), v))
} else {
# even case
k <- n%/%2
x <- seq(1,-1,len=n)[1:k]
c(hsv(h = hmin, s = x, v),
hsv(h = hmax, s = rev(x), v))
}
}
#' @export
RC.colors <- diverging.colors
#' @export
GM.colors <- function(n) diverging.colors(n,2/6)
# returns a new vector of darker colors
darker <- function(col,scale=0.8) {
crgb <- col2rgb(col)/255*scale
if(T) {
# handle black specially
i <- which(apply(crgb,2,sum)==0)
if(length(i)>0) {
crgb[,i] <- rep.col(c(1,1,1)/10,length(i))
}
}
rgb(crgb[1,],crgb[2,],crgb[3,])
}
# note: pch 21 looks like pch 1, but is actually a filled circle
#plot(y,pch=21,bg=hsv(6/12,1,1))
#' Add a thin key to the top of a plot
#'
#' Similar to \code{\link{legend}} but takes up less space.
#'
#'
#' @param col a vector of colors, one for each group. Assumed black if none
#' given.
#' @param pch a vector of symbols, one for each group. If less than two
#' distinct symbols are given, no symbols are put in the key.
#' @param labels a character vector giving the label of each group.
#' @param breaks a numeric vector of boundaries between groups.
#' @param digits number of digits to use for \code{breaks}.
#' @param cex character expansion factor.
#' @return A thin bar is placed at the top of the plot, just inside the
#' plotting area. The bar is divided into equal-sized lengths, colored
#' according to \code{col}. If \code{pch} is given, that symbol is plotted in
#' the center of each segment. If \code{labels} is given, each label is placed
#' at the center of the corresponding segment, just above the plotting area
#' (using \code{\link{mtext}}). If \code{breaks} is given, the boundary
#' between segments is also labeled with a number.
#' @author Tom Minka
#' @export
#' @seealso \code{\link{color.plot}}
#' @examples
#'
#' data(iris)
#' y = as.numeric(iris$Species)
#' plot(Sepal.Width ~ Sepal.Length, iris,col=y,pch=y)
#' color.key(1:3,1:3,levels(iris$Species))
#'
color.key <- function(col=NULL,pch=NULL,labels=NULL,breaks=NULL,digits=2,cex=0.75*par("cex")) {
# don't draw symbols in the key if there is only one type
if(length(unique(pch)) == 1) pch <- NULL
if(is.null(col)) col <- rep(1,length(pch))
rx <- range(par("usr")[1:2])
ry <- range(par("usr")[3:4])
if(T && !is.null(pch)) {
# key is par("csi")*cex inches
ry[1] <- ry[2] - diff(ry)/par("pin")[2]*par("csi")*cex
} else {
# key is 0.06 inches high
ry[1] <- ry[2] - diff(ry)/par("pin")[2]*0.06
}
if(!is.null(labels)) n <- length(labels)
else if(!is.null(breaks)) n <- length(breaks)-1
else n <- length(col)
i <- seq(0,1,len=n+1)
x1 <- rx[1]+i[-length(i)]*diff(rx)
x2 <- rx[1]+i[-1]*diff(rx)
if(n == 0) stop("n == 0")
col <- rep(col,ceiling(n/length(col)))[1:n]
rect(x1, ry[1], x2, ry[2], col=col, border=NA)
if(!is.null(pch))
points((x1+x2)/2, rep(mean(ry),n), col=8, pch=pch, cex=0.75)
if(!is.null(labels))
mtext(labels,at=(x1+x2)/2,cex=cex)
if(!is.null(breaks)) {
x <- rx[1] + i*diff(rx)
# same as format.pretty
labels <- prettyNum(formatC(breaks,format="fg",digits=digits),big.mark=",")
#labels = format(breaks,digits=digits,trim=T)
mtext(labels[1],at=x[1],cex=cex,adj=0)
mtext(labels[n+1],at=x[n+1],cex=cex,adj=1)
mtext(labels[2:n],at=x[2:n],cex=cex,adj=0.5)
}
}
# extends r=c(min,max) by 4%, to mimic plotting limits
extend.pct <- function(r,pct=0.04) {
m <- diff(r)*pct
if(m == 0) m <- 0.432*abs(r[1])
if(m == 0) m <- 1.08
c(r[1]-m,r[2]+m)
}
extend.inches <- function(r,upper=0,lower=0) {
# extends the given range by a specified number of inches of each side.
# upper and lower are actually inches/plot.size
# constraints:
# (r.new[2] - r[2])/diff(r.new) = upper
# (r[1] - r.new[1])/diff(r.new) = lower
r.new <- r
a = lower/(1-upper)
r.new[1] <- (a*r[2] - r[1])/(a-1)
r.new[2] <- (r[2] - upper*r.new[1])/(1-upper)
r.new
}
# returns the minimum user limits for a plotting region of size pin
# which would enclose the given labels at the given positions
# with the justification adj at rotation srt.
range.text <- function(x,y,labels,cex=NULL,adj=NULL,srt=0,
pin=par("pin")) {
bad = (is.na(x) | is.na(y) | is.na(labels))
x = x[!bad]; y = y[!bad]; labels = labels[!bad]
lab.w <- strwidth(labels,"inches",cex=cex)
lab.h <- strheight(labels,"inches",cex=cex)
# bounding box of text, columns are (x1,y1)(x2,y1)(x2,y2)(x1,y2)
n <- length(labels)
ix <- seq(1,8,by=2)
iy <- seq(2,8,by=2)
hull <- array(0,c(n,8))
hull[,c(1,7,2,4)] <- rep(0,n)
hull[,c(3,5,6,8)] <- rep(1,n)
# put adj point at origin and scale
if(is.null(adj)) adj <- c(0.5,0.5)
if(length(adj) == 1) adj <- c(adj,0.5)
if(is.null(nrow(adj))) {
hull[,ix] <- (hull[,ix] - adj[1])*rep.col(lab.w,4)
hull[,iy] <- (hull[,iy] - adj[2])*rep.col(lab.h,4)
} else {
# adj is an n by 2 matrix
adj = adj[!bad,]
if(nrow(adj) != n) stop("nrow(adj) != length(x)")
hull[,ix] <- (hull[,ix] - rep.col(adj[,1],4))*rep.col(lab.w,4)
hull[,iy] <- (hull[,iy] - rep.col(adj[,2],4))*rep.col(lab.h,4)
}
# rotate
srt <- srt/180*pi
cr <- cos(srt)
sr <- sin(srt)
R <- array(c(cr,-sr,sr,cr),c(2,2))
R <- diag(4) %x% R
hull <- hull %*% R
bbox <- list()
bbox$left <- apply(hull[,ix],1,min)/pin[1]
bbox$right <- apply(hull[,ix],1,max)/pin[1]
bbox$bottom <- apply(hull[,iy],1,min)/pin[2]
bbox$top <- apply(hull[,iy],1,max)/pin[2]
# solve constraints
xmid <- mean(range(x))
ymid <- mean(range(y))
xlim <- c()
# these come from the constraints
# (x-xmin)/(xmax-xmin) + left > 0
# (x-xmin)/(xmax-xmin) + right < 1
# where xmax is fixed at 2*xmid - xmin
min1 <- min((x + 2*bbox$left*xmid)/(1+2*bbox$left))
min2 <- min((2*(1-bbox$right)*xmid - x)/(1-2*bbox$right))
xlim[1] <- min(min1,min2)
xlim[2] <- 2*xmid - xlim[1]
ylim <- c()
min1 <- min((y + 2*bbox$bottom*ymid)/(1+2*bbox$bottom))
min2 <- min((2*(1-bbox$top)*ymid - y)/(1-2*bbox$top))
ylim[1] <- min(min1,min2)
ylim[2] <- 2*ymid - ylim[1]
c(xlim,ylim)
}
range.aspect <- function(asp,lim=par("usr"),pin=par("pin")) {
# returns c(xlim,ylim), strictly bigger than lim, which satisfy asp
# simulates the effect of plot.window(asp)
xlim <- lim[1:2]
ylim <- lim[3:4]
dx <- diff(xlim)/pin[1]
dy <- diff(ylim)/pin[2]
if(dy > asp*dx) {
dx <- dy/asp*pin[1]
xlim <- mean(xlim) + c(-dx/2,dx/2)
} else {
dy <- asp*dx*pin[2]
ylim <- mean(ylim) + c(-dy/2,dy/2)
}
c(xlim,ylim)
}
default.pch <- c(1,41,4,20,35,38,2,17)
# use for the screen
sequential.pch <- c(20,41,4,1,38,35,11,19)
# use for printing
sequential.pch.paper <- c(45,41,4,1,38,35,11,19)
#' Plot subgroups as colored points
#'
#' Like \code{\link{plot}} and \code{\link{text.plot}} but colors according to
#' a third variable.
#'
#' Each (x,y) point is plotted with a color determined by \code{z}. If
#' \code{z} is a factor, each factor level is in a different color. If
#' \code{z} is numeric, then it is color-coded by assigning each quantile a
#' different color.
#'
#' Despite the name, this function can also make plots using different symbols
#' instead of colors. For example, if \code{color.palette=1} then all points
#' will be black but use different symbols.
#'
#' When \code{labels != NULL}, the result is equivalent to
#' \code{\link{text.plot}} with colors.
#'
#' If \code{key=TRUE} and there is more than one color or symbol on the plot, a
#' key is displayed at the top of the figure.
#'
#' @aliases color.plot color.plot.default
#' @param x,y numeric vectors of the same length.
#' @param z a numeric vector or factor, same length as \code{x} and \code{y}.
#' @param labels a character vector of labels, same length as \code{z}. If
#' NULL, cases are plotted as points (default).
#' @param axes If FALSE, no axes are plotted (but there may still be a key).
#' @param key If FALSE, no key is plotted.
#' @param add If FALSE, a new plot is made. If TRUE, points or labels are
#' added to the existing plot.
#' @param nlevels an integer. If \code{z} is numeric, it is color-coded using
#' this many levels. (If \code{z} is a factor, color-coding follows the factor
#' levels.)
#' @param color.palette a vector of colors, arbitrary length, or a function
#' with integer argument which generates a vector of colors (e.g.
#' \code{\link{YlGnBu.colors}}). Used if \code{col} is not specified. If
#' shorter than the number of levels, colors will be recycled and the plotting
#' symbol will change.
#' @param col a vector of colors, as in a call to \code{\link{plot}}. Used to
#' explicitly set the color of each point.
#' @param pch.palette a vector of plotting symbols, arbitrary length. If
#' \code{labels=NULL}, the plot symbol will rotate through these when there
#' aren't enough colors.
#' @param pch a vector of plotting symbols, as in a call to \code{\link{plot}}.
#' Used to explicitly set the symbol of each point.
#' @param digits the number of digits to use in the color key when \code{z} is
#' numeric.
#' @param bg the background color
#' @param mar figure margins (see \code{par}). If not specified, appropriate
#' margins will be chosen automatically, which will not necessarily match the
#' current value of \code{par("mar")}.
#' @return A plot is produced.
#' @note This function sets the figure margins permanently, so that you can
#' draw on the color plot. Unfortunately, this also means future plots will
#' use the same margins, until you change them with \code{par("mar")}.
#' @author Tom Minka
#' @seealso
#' \code{\link{color.plot.data.frame}},\code{\link{color.plot.loess}},\code{\link{color.plot.glm}},\code{\link{color.plot.knn}},\code{\link{color.plot.tree}},\code{\link{YlGnBu.colors}}
#' @examples
#'
#' # See the examples for color.plot.data.frame
#'
#' @export
color.plot <- function(object, ...) UseMethod("color.plot")
#' @export
color.plot.formula <- function(formula,data=parent.frame(),...) {
x <- model.frame.default(formula,data,na.action=na.pass)
# put response at end (my preferred ordering)
x = cbind(x[-1],x[1])
color.plot.data.frame(x,...)
}
#' Plot cases as colored points
#'
#' Like \code{\link{plot}} and \code{\link{text.plot}} but colors according to
#' the response variable.
#'
#' Calls \code{\link{color.plot.default}} with \code{x} as the first predictor
#' in \code{data}, \code{y} as the second predictor, and \code{z} as the
#' response. To get a different predictor/response breakdown than the default,
#' use \code{color.plot(formula, x, ...)}, which is shorthand for
#' \code{color.plot(model.frame(formula, x), ...)}.
#'
#' Each case is plotted with a color determined by the response. If the
#' response is a factor, each factor level is in a different color. If the
#' response is numeric, then it is color-coded by assigning each quantile a
#' different color.
#'
#' @aliases color.plot.data.frame color.plot.formula
#' @param data a data frame.
#' @param formula a formula specifying a response and two predictors from
#' \code{data}
#' @param labels If NULL, cases are plotted as points. If T, cases are plotted
#' as labels, according to \code{rownames}.
#' @param ... Extra arguments passed to \code{\link{color.plot.default}}.
#' @author Tom Minka
#' @seealso \code{\link{color.plot.default}}
#' @examples
#'
#' data(iris)
#' color.plot(iris)
#' color.plot(Species ~ Petal.Length + Petal.Width, iris)
#' #color.plot(Species ~ Petal.Length, iris)
#' #color.plot(Species ~ Petal.Length, iris,jitter=T)
#' color.plot(iris, col=1)
#' color.plot(iris, col=c(1,2))
#'
#' data(state)
#' x <- data.frame(state.x77)
#' color.plot(Murder ~ Frost + Illiteracy, x, labels=T, cex=0.5)
#'
#' @export
color.plot.data.frame <- function(x,z,zlab=NULL,xlab=NULL,ylab=NULL,labels=F,...) {
if(missing(z)) {
pred <- predictor.terms(x)
resp <- response.var(x)
z <- x[[resp]]
if(is.null(zlab)) zlab <- resp
} else {
pred <- colnames(x)
if(is.null(zlab)) zlab <- deparse(substitute(z))
}
if(length(pred) < 2) stop("must have two predictors to plot")
if(is.logical(labels)) {
labels <- if(labels) rownames(x) else NULL
}
if(is.null(xlab)) xlab <- pred[1]
if(is.null(ylab)) ylab <- pred[2]
color.plot.default(x[[pred[1]]],x[[pred[2]]],z,labels=labels,
xlab=xlab,ylab=ylab,zlab=zlab,...)
}
#' @export
color.plot.default <- function(x,y,z,labels=NULL,data=parent.frame(),
xlab=NULL,ylab=NULL,zlab=NULL,main="",
xlim=NULL,ylim=NULL,
axes=T,key=T,add=F,nlevels=4,pretty=T,
color.palette=NULL,col,
pch.palette=NULL,pch,
jitter=F,digits=2,mar,bg=NULL,
cex=par("cex"),yaxt="s",...) {
# continous responses are automatically quantized into nlevels levels, with
# colors given by col.
# jitter is only a suggestion. jittering is only done when necessary.
if(is.null(xlab)) xlab <- deparse(substitute(x))
if(is.null(ylab)) ylab <- deparse(substitute(y))
if(is.null(zlab)) zlab <- deparse(substitute(z))
x <- eval(substitute(x),data)
y <- eval(substitute(y),data)
z <- eval(substitute(z),data)
# treat character as factor
if(mode(x) == "character") x <- factor(x,levels=unique(x))
if(mode(y) == "character") y <- factor(y,levels=unique(x))
if(is.numeric(z)) {
b <- break.quantile(z,nlevels,pretty=pretty)
f <- rcut(z,b)
if(!any(is.na(z)) && any(is.na(f)))
stop("internal error: cut introduced NAs")
if(is.null(color.palette)) {
color.palette = YlGnBu.colors
#color.palette = YR.colors
# pick bg color using color.cone(c(YlGnBu.colors(8),grey(0.65)))
# don't use grey(0.6) or grey(0.7)
if(is.null(bg)) bg = grey(0.65)
}
if(is.null(pch.palette)) {
pch.palette = if(mode(color.palette) == "function") 16
else sequential.pch
}
} else {
f <- factor(z)
if(is.null(color.palette)) color.palette = default.colors
if(is.null(pch.palette)) {
pch.palette = if(mode(color.palette) == "function") 16
else default.pch
}
}
nlevels <- length(levels(f))
if(nlevels == 0) stop("0 levels in factor")
# lev.col is the color of each level
# lev.pch is the symbol of each level
# if there are too few colors, recycle them and change pch
lev <- 1:nlevels
# col is the color of the individual points
q <- as.numeric(f)
if(missing(col)) {
if(mode(color.palette) == "function")
color.palette <- color.palette(nlevels)
lev.col <- color.palette[((lev-1) %% length(color.palette))+1]
col <- lev.col[q]
} else {
lev.col <- NULL
}
if(missing(pch)) {
lev.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1]
pch <- lev.pch[q]
} else {
lev.pch <- NULL
}
# setup plot window
if(!add) {
if(missing(mar)) mar = auto.mar(main=zlab,axes=axes,yaxt=yaxt,key=key)
# change mar permanently so we can add things on top
par(mar=mar)
if(!is.null(bg) && bg != par("bg")) {
opar <- par(bg=bg)
on.exit(par(opar))
}
}
# xlim,ylim needed for jittering and label placement
if(add) {
xlim <- par("usr")[1:2]
ylim <- par("usr")[3:4]
} else {
if(is.null(labels)) {
if(is.null(xlim)) xlim <- range(as.numeric(x),na.rm=T)
if(is.null(ylim)) ylim <- range(as.numeric(y),na.rm=T)
if(key) {
# make room at the top of the plot for the key
if(length(unique(lev.pch)) == 1) {
a = 0.06/par("pin")[2]
} else {
a = par("csi")*cex/par("pin")[2]
}
ylim = extend.inches(ylim,a)
}
} else {
plot.new()
lab.lim <- range.text(x,y,labels,cex)
# make room for labels
if(is.null(xlim)) xlim <- lab.lim[1:2]
if(is.null(ylim)) ylim <- lab.lim[3:4]
}
}
if(jitter) {
# this test needs to be less strict. need to consider actual figure size.
if(length(levels(factor(x))) < length(x)) {
cat("jittering",xlab,"\n")
jitter1 <- (runif(length(x))-0.5)*diff(extend.pct(xlim))/100
x <- x+jitter1
}
if(length(levels(factor(y))) < length(y)) {
cat("jittering",ylab,"\n")
jitter2 <- (runif(length(y))-0.5)*diff(extend.pct(ylim))/100
y <- y+jitter2
}
}
if(add) {
if(!is.null(labels)) {
return(text(x, y, labels=labels, col=col, cex=cex, ...))
} else {
return(points(x, y, pch=pch, col=col, cex=cex, ...))
}
}
# from here on, add=F
if(!is.null(labels)) {
plot.default(x, y, xlim=xlim, ylim=ylim, type="n",
xlab=xlab,ylab=ylab,main="",axes=axes,yaxt=yaxt,...)
text(x, y, labels=labels, col=col, cex=cex, ...)
} else {
plot.default(x, y, xlim=xlim, ylim=ylim, xlab=xlab,ylab=ylab,main="",
pch=pch,col=col,axes=axes,yaxt=yaxt,cex=cex,...)
}
if(key) {
if(is.numeric(z))
color.key(lev.col,lev.pch,breaks=b,digits=digits)
else
color.key(lev.col,lev.pch,levels(f))
if(axes) title(zlab,line=1)
}
else if(axes) title(zlab)
}
# tests:
# color.plot(runif(10),runif(10),c(rep("a",5),rep("b",5)))
# color.plot(runif(10),runif(10),c(rep("a",5),rep("b",5)),rep("foo",10))
# this part same as color.plot
#'
#' Make a plot of text labels
#'
#' Like \code{\link{text}} except it creates a new plot with
#' limits chosen to make all labels visible.
#' @param x,y numeric vectors, same length
#' @param data a data frame with at least two columns
#' @param formula a formula specifying a response and predictor variable
#' @param labels character vector, same length as \code{x}
#' @param srt rotation angle as in \code{\link{text}}.
#' @param adj text justification as in \code{\link{text}}.
#' @details
#' The main job of \code{\link{text.plot}} is finding the right plot
#' limits so that all labels are visible. It does this by computing the
#' bounding box of each label (taking into account text rotation and
#' justification) and solving a system of inequalities which ensure that
#' each label fits entirely into the plot window.
#' @aliases text.plot text.plot.default text.plot.data.frame text.plot.formula
#' @author Tom Minka
#' @examples
#' data(state)
#' x <- data.frame(state.x77)
#' text_plot(x$Frost, x$HS.Grad, rownames(x))
#'
#' # same thing, using text.plot.formula
#' text_plot(HS.Grad ~ Frost, x)
#'
#' # notice how the limits change
#' text_plot(HS.Grad ~ Frost, x, srt=45)
#' text_plot(HS.Grad ~ Frost, x, srt=90)
#' @export
text_plot <- function(object,...) UseMethod("text_plot")
#' @export
text_plot.formula <- function(formula,data=parent.frame(),...) {
x <- model.frame.default(formula,data,na.action=na.pass)
text_plot.data.frame(x,...)
}
#' @export
text_plot.data.frame <- function(x,labels=TRUE,xlab,ylab,...) {
pred <- predictor.vars(x)[1]
resp <- response.var(x)
x1 <- x[[pred]]
x2 <- x[[resp]]
if(identical(labels,TRUE)) labels <- rownames(x)
if(missing(xlab)) xlab = pred
if(missing(ylab)) ylab = resp
text_plot.default(x1,x2,labels,xlab=xlab,ylab=ylab,...)
}
#' @export
text_plot.default <- function(x,y,labels=NULL,xlab,ylab,xlim=NULL,ylim=NULL,
main="",asp=NULL,move=F,
cex=par("cex"),pch=par("pch"),
adj=NULL,srt=0,axes=T,...) {
if(missing(xlab)) xlab <- deparse(substitute(x))
if(missing(ylab)) ylab <- deparse(substitute(y))
if(is.null(labels)) labels = names(x)
if(is.null(labels)) labels = names(y)
if(length(labels) != length(x)) stop("labels is wrong length")
plot.new()
lim <- range.text(x,y,labels,cex,adj,srt)
# make room for labels
if(is.null(xlim)) xlim <- lim[1:2]
if(is.null(ylim)) ylim <- lim[3:4]
if(F) {
plot(x,y,xlab=xlab,ylab=ylab,xlim=xlim,ylim=ylim,axes=axes,
main=main,...,type="n")
} else {
plot.window(xlim,ylim,asp=asp)
if(axes) { axis(1,...); axis(2,...); box() }
title(main,xlab=xlab,ylab=ylab)
}
if(move) {
points(x,y,cex=cex,pch=pch,...)
x = inchx(x)
y = inchy(y)
w = strwidth(labels,units="inch",cex=cex)
h = strheight(labels,units="inch",cex=cex)
n = length(x)
s = inch.symbol(pch=pch,cex=cex) + 0.1
w2 = c(w,rep(s,n))
h2 = c(h,rep(s,n))
x2 = c(x,x)
y2 = c(y,y)
#rect(xinch(x2-w2/2),yinch(y2-h2/2),xinch(x2+w2/2),yinch(y2+h2/2))
cost = c(rep(1,n),rep(0,n))
r = move.collisions2(x2,y2,w2,h2,cost)
#rect(xinch(r[,1]-w2/2),yinch(r[,2]-h2/2),xinch(r[,1]+w2/2),yinch(r[,2]+h2/2))
r = r[1:n,]
x = xinch(r[,1])
y = yinch(r[,2])
}
if(!is.null(nrow(adj))) {
if(nrow(adj) != length(x)) stop("nrow(adj) != length(x)")
# adj is an n by 2 matrix
for(i in 1:nrow(adj)) {
text(x[i],y[i],labels=labels[i],cex=cex,adj=drop(adj[i,]),srt=srt,...)
}
} else {
text(x,y,labels=labels,cex=cex,adj=adj,srt=srt,...)
}
}
# test: text.plot(runif(10),runif(10),rep("a",10))
#' Contour plot of a regression surface
#'
#'
#' The regression surface is evaluated at all points on a grid, clipped values
#' are set to \code{NA}, and \code{contour.plot} is used to plot the contours.
#'
#' If \code{add=FALSE}, the data is plotted on top using
#' \code{color.plot.data.frame}.
#'
#' @param object a \code{\link{loess}} object.
#' @param data data to use instead of \code{model.frame(object)}.
#' @param res resolution of the sampling grid in each direction.
#' @param fill passed to \code{\link{contour.plot}}.
#' @param add If \code{TRUE}, the contours are added to an existing plot.
#' Otherwise a new plot is created.
#' @param clip a polygon over which the surface is to be defined. Possible
#' values are \code{FALSE} (no clipping), \code{TRUE} (clip to the convex hull
#' of the data), or a matrix with two columns specifying (x,y) coordinates.
#' @param ... extra arguments to \code{\link{color.plot.data.frame}}
#' @return A plot is produced.
#' @author Tom Minka
#' @seealso \code{\link{contour.plot}}
#' @examples
#'
#' data(Housing)
#' fit = loess(Price ~ Rooms + Low.Status, Housing)
#' color.plot(fit)
#'
#' @export
color.plot.loess <- function(object,x,res=50,fill=F,add=fill,
clip=T,xlab=NULL,ylab=NULL,zlab=NULL, ...) {
if(missing(x)) x <- model.frame(object)
else x <- model.frame(terms(object),x)
resp <- response.var(object)
pred <- predictor.vars(object)
if(length(pred) < 2) stop("must have 2 predictors")
#return(plot.loess(object,x,add=add,lwd=lwd,...))
if(is.null(xlab)) xlab = pred[1]
if(is.null(ylab)) ylab = pred[2]
if(is.null(zlab)) zlab = resp
if(!add && !fill) color.plot.data.frame(x,xlab=xlab,ylab=ylab,zlab=zlab,...)
# use the par options set by color.plot.data.frame
# x is only used to get plotting range
# this is ugly, but otherwise recursively calling color.plot.data.frame is hard
if(is.null(list(...)$xlim)) {
if(add || !fill) xlim <- range(par("usr")[1:2])
else xlim <- range(x[[pred[1]]])
} else xlim <- list(...)$xlim
x1 <- seq(xlim[1],xlim[2],length=res)
if(is.null(list(...)$ylim)) {
if(add || !fill) ylim <- range(par("usr")[3:4])
else ylim <- range(x[[pred[2]]])
} else ylim <- list(...)$ylim
x2 <- seq(ylim[1],ylim[2],length=res)
xt <- expand.grid(x1,x2)
names(xt) <- pred[1:2]
z <- predict(object,xt)
if(clip != F && length(pred) >= 2) {
if(clip == T) {
i <- chull(x[pred])
clip <- x[pred][i,]
}
z[!in.polygon(clip,xt)] <- NA
}
dim(z) <- c(length(x1),length(x2))
contour.plot(x1,x2,z,fill=fill,level.data=x[[resp]],
xlab=xlab,ylab=ylab,zlab=zlab,add=(add || !fill),...)
if(!add && fill) color.plot.data.frame(x,col=1,add=T,...)
}
#' Display contours
#'
#' Create a filled or unfilled contour plot.
#'
#' This is a wrapper function for \code{\link{contour}} and
#' \code{\link{filled.contour}} whose main purpose is to provide a uniform
#' interface and provide a decent automatic choice of levels.
#'
#' If \code{equal=FALSE}, the levels are chosen according to the equal-count
#' algorithm of \code{\link{break.quantile}}.
#'
#' @param x,y locations at which the values in \code{z} are measured.
#' @param z a matrix containing the values to be plotted (NAs are allowed).
#' @param fill If \code{TRUE}, makes a filled contour plot.
#' @param levels numeric vector of levels at which to draw contour lines. The
#' next few arguments only apply to the case when \code{levels==NULL}.
#' @param nlevels The number of contour lines to draw.
#' @param level.data numeric vector to use for computing \code{levels}.
#' @param zlim The range that the levels should cover. Defaults to the range
#' of \code{level.data}.
#' @param equal If \code{TRUE}, the levels are equally spaced over \code{zlim}.
#' Otherwise they match the quantiles of \code{level.data}.
#' @param pretty If \code{TRUE}, the levels are chosen to be round numbers.
#' @param lwd width of the contour lines.
#' @param drawlabels If \code{TRUE}, the contour lines are labeled with their
#' level.
#' @param color.palette a vector of colors, or a function which takes a number
#' and returns a vector of that length.
#' @param bg the background color.
#' @param key If \code{TRUE}, a color key is drawn at the top of the plot. If
#' \code{key==2}, the key of \code{filled.contour} is used.
#' @param main the title of the plot.
#' @param ... extra arguments passed to \code{contour} or
#' \code{filled.contour}.
#' @author Tom Minka
#' @seealso \code{\link{contour}},\code{\link{filled.contour}},
#' \code{\link{color.plot.loess}}
#' @examples
#'
#' # see the examples for color.plot.loess
#'
contour.plot <- function(x,y,z,fill=F,
levels=NULL,nlevels=if(is.null(levels)) 4 else length(levels),
level.data=z,
zlim,equal=F,pretty=T,lwd=2,drawlabels=F,
color.palette=YlGnBu.colors,bg=grey(0.65),
key=T,zlab="",...) {
if(is.function(color.palette)) {
color.palette <- color.palette(nlevels)
}
if(missing(zlim)) {
zlim = range(level.data,na.rm=T)
z[z < zlim[1]] <- zlim[1]
z[z > zlim[2]] <- zlim[2]
}
if(is.null(levels)) {
if(equal)
levels <- seq(zlim[1],zlim[2],len=nlevels+1)
else {
r <- unique(level.data)
r <- r[r >= zlim[1] & r <= zlim[2]]
r <- c(r,zlim)
levels <- break.quantile(r,nlevels,pretty=pretty)
}
}
opar = par(bg=bg)
on.exit(par(opar))
if(fill) filled.contour(x,y,z,levels=levels,col=color.palette,main="",
key=if(key==2) T else F,...)
else {
color.palette <- c(color.palette,color.palette[length(color.palette)])
contour(x,y,z,col=color.palette,levels=levels,drawlabels=drawlabels,lwd=lwd,main="",...)
}
if(key && (key != 2)) {
color.key(color.palette,breaks=levels)
title(zlab,line=1)
}
else title(zlab)
}
color.plot.rtable <- function(rt,main=response.var(rt),
xlab=predictor.vars(rt)[1],
ylab=predictor.vars(rt)[2],...) {
x = as.numeric(rownames(rt))
y = as.numeric(colnames(rt))
contour.plot(x,y,rt,main=main,xlab=xlab,ylab=ylab,...)
}
color.plot.glm <- function(object,data,add=F,se=F,col=3,lwd=2,...) {
if(missing(data)) data = model.frame(object)
if(!add) color.plot.data.frame(data,...)
# use the par options set by color.plot.data.frame
w <- coef(object)
z <- log(0.75/0.25)
if(length(w) == 3) {
abline(-w[1]/w[3],-w[2]/w[3],col=col,lwd=lwd,...)
if(se) {
abline((-z-w[1])/w[3],-w[2]/w[3],col=col,lty=2,lwd=lwd,...)
abline((z-w[1])/w[3],-w[2]/w[3],col=col,lty=2,lwd=lwd,...)
}
} else {
stop("use color.plot.knn")
# two predictors with an interaction term
# boundary is an ellipse
xlim <- par("usr")[1:2]
x <- seq(xlim[1],xlim[2],length=50)
y <- (0-w[1]-w[2]*x)/(w[3]+w[4]*x)
i <- (w[3]+w[4]*x < 0)
lines(x[i],y[i],col=col,...)
lines(x[!i],y[!i],col=col,...)
if(se) {
for(q in c(-z,z)) {
y <- (q-w[1]-w[2]*x)/(w[3]+w[4]*x)
lines(x[i],y[i],col=col,lty=2,...)
lines(x[!i],y[!i],col=col,lty=2,...)
}
}
}
}
color.plot.multinom <- color.plot.glm
color.plot.logistic = color.plot.glm
# Plot classification boundaries.
# If pclass is given, the probability of being in that class is plotted.
#
# levels and col are only used if pclass != NULL
#
color.plot.knn <- function(object,x,pclass=NULL,fill=F,res=50,
levels=c(0.5),add=F,axes=T,lwd=2,
color.palette=default.colors,col=3,drawlabels=F,...) {
resp <- response.var(object)
pred <- predictor.vars(object)
if(missing(x)) x <- model.frame(object)
else if(length(pred) == 1) {
# take an unused variable from x
pred[2] = setdiff(colnames(x),c(resp,pred))[1]
fmla = formula(paste(resp,"~",paste(pred,collapse="+")))
x = model.frame(fmla,x)
} else x <- model.frame(terms(object),x)
if(!add && !fill) color.plot.data.frame(x,color.palette=color.palette,...)
# use the par options set by color.plot.data.frame
# x is only used to get plotting range
# this is ugly, but otherwise recursively calling color.plot.data.frame is hard
if(is.null(...$xlim)) {
if(add || !fill) xlim <- range(par("usr")[1:2])
else xlim <- range(x[[pred[1]]])
} else xlim <- ...$xlim
x1 <- seq(xlim[1],xlim[2],length=res)
# allow > 2 for derived features
if(length(pred) >= 2) {
if(is.null(...$ylim)) {
if(add || !fill) ylim <- range(par("usr")[3:4])
else ylim <- range(x[[pred[2]]])
} else ylim <- ...$ylim
x2 <- seq(ylim[1],ylim[2],length=res)
xt <- expand.grid(x1,x2)
names(xt) <- pred[1:2]
} else {
xt <- data.frame(x1)
}
prob.plot <- !is.null(pclass)
if(prob.plot) {
# z is the probability of a class
if(inherits(object,"glm") || inherits(object,"gam") ||
inherits(object,"nnet")) {
if(inherits(object,"glm") || inherits(object,"gam")) {
z <- predict(object,xt,type="response")
} else {
z = predict(object,xt)
}
y <- model.frame(object)[[resp]]
if(is.character(pclass)) pclass <- pmatch(pclass, levels(y))
if(pclass == 1) {
z <- 1-z
lev <- levels(y)[1]
main <- paste("Pr(",resp," = ",lev,")",sep="")
}
else {
if(pclass > length(levels(y))) stop("pclass exceeds the number of classes")
if(length(levels(y)) == 2) {
lev <- levels(y)[2]
main <- paste("Pr(",resp," = ",lev,")",sep="")
}
else {
lev <- levels(y)[1]
main <- paste("Pr(",resp," != ",lev,")",sep="")
}
}
} else {
z <- predict(object,xt,type="vector")
if(is.character(pclass)) pclass <- pmatch(pclass, colnames(z))
lev <- colnames(z)[pclass]
main <- paste("Pr(",resp," = ",lev,")",sep="")
z <- z[,pclass]
}
}
else {
# z is the class
if(inherits(object,"glm") || inherits(object,"gam")) {
p <- predict(object,xt,type="response")
y <- model.frame(object)[[resp]]
z <- factor(levels(y)[as.numeric(p > 0.5)+1], levels=levels(y))
}
else {
z <- factor(predict(object,xt,type="class"))
}
}
dim(z) <- c(length(x1),length(x2))
if(fill) {
if(prob.plot) {
filled.contour(x1,x2,z,
plot_axes={
title(main=main,xlab=pred[1],ylab=pred[2]);axis(1);axis(2);
if(!add) color.plot.data.frame(x,col=1,add=T,...)
},...)
}
else {
lev <- levels(z)
if(is.function(color.palette))
col <- color.palette(length(lev))
else
col <- color.palette
actual.lev <- levels(factor(z))
icol <- col[lev %in% actual.lev]
z <- as.numeric(z)
dim(z) <- c(length(x1),length(x2))
colorbar <- (length(col) > 1)
if(!add) {
# change mar permanently so we can add things on top
if(axes) {
mar = auto.mar()
mar[3] = if(colorbar) 2 else 1
par(mar=mar)
} else {
par(mar=c(0.05,0,0,0.1))
}
image(x1,x2,z,col=icol,main="",xlab=pred[1],ylab=pred[2])
# assume a light color scheme (else should be lighter)
dcol <- darker(col)
color.plot.data.frame(x,add=T,col=dcol,...)
}
else image(x1,x2,z,col=icol,add=T)
if(colorbar) {
color.key(col,lev)
if(!add) title(resp,line=1)
} else if(!add) title(resp,line=0)
}
}
else {
# not filled
if(prob.plot) {
cat("Plotting contour for",main,"=",levels,"\n")
contour(x1,x2,z,add=T, levels=levels,col=col,lwd=lwd,
drawlabels=drawlabels,...)
}
else {
nlevels <- length(levels(z))
z <- as.numeric(z)
dim(z) <- c(length(x1),length(x2))
levels <- 0.5+(1:(nlevels-1))
contour(x1,x2,z,add=T,levels=levels,col=col,lwd=lwd,
drawlabels=drawlabels,...)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.