R/utility.function.R In directlabels: Direct Labels for Multicolor Plots

Documented in ahull.pointsbumpupchull.pointsdefault.ahulldraw.polygonsdraw.rectsenlarge.boxextreme.pointsfar.from.others.bordersignore.nain1boxlabel.piecesmake.tiebreakermidrangeonly.unique.valsoutside.ahulloutside.chullstatic.labelsvertical.qpxlimitsylimits

### Find the point on each curve which maximizes the distance to the
### plot border or to another curve.
far.from.others.borders <- function(all.groups,...,debug=FALSE){
group.data <- split(all.groups, all.groups$group) group.list <- list() for(groups in names(group.data)){ ## Run linear interpolation to get a set of points on which we ## could place the label (this is useful for e.g. the lasso path ## where there are only a few points plotted). one.group <- group.data[[groups]] approx.list <- with(one.group, approx(x, y)) if(debug){ with(approx.list, grid.points(x, y, default.units="cm")) } group.list[[groups]] <- data.frame( approx.list, groups, label=one.group$label[1])
}
output.list <- list()
for(group.i in seq_along(group.list)){
one.group <- group.list[[group.i]]
## From Mark Schmidt: "For the location of the boxes, I found the
## data point on the line that has the maximum distance (in the
## image coordinates) to the nearest data point on another line or
## to the image boundary."
dist.mat <- matrix(NA, length(one.group$x), 3) colnames(dist.mat) <- c("x","y","other") ## dist.mat has 3 columns: the first two are the shortest distance ## to the nearest x and y border, and the third is the shortest ## distance to another data point. for(xy in c("x", "y")){ xy.vec <- one.group[,xy] xy.mat <- rbind(xy.vec, xy.vec) lim.fun <- get(sprintf("%slimits", xy)) diff.mat <- xy.mat - lim.fun() dist.mat[,xy] <- apply(abs(diff.mat), 2, min) } other.groups <- group.list[-group.i] other.df <- do.call(rbind, other.groups) for(row.i in 1:nrow(dist.mat)){ r <- one.group[row.i,] other.dist <- with(other.df, (x-r$x)^2 + (y-r$y)^2) dist.mat[row.i,"other"] <- sqrt(min(other.dist)) } shortest.dist <- apply(dist.mat, 1, min) picked <- calc.boxes(one.group[which.max(shortest.dist),]) ## Mark's label rotation: "For the angle, I computed the slope ## between neighboring data points (which isn't ideal for noisy ## data, it should probably be based on a smoothed estimate)." left <- max(picked$left, min(one.group$x)) right <- min(picked$right, max(one.group$x)) neighbors <- approx(one.group$x, one.group$y, c(left, right)) slope <- with(neighbors, (y[2]-y[1])/(x[2]-x[1])) picked$rot <- 180*atan(slope)/pi
output.list[[group.i]] <- picked
}
output <- do.call(rbind, output.list)
##browser()
output
}

label.endpoints <- function
### Make a Positioning Method that labels a certain x value.
(FUN,
### FUN(d$x) should return an index of which point to label. for ### example you can use which.min or which.max. HJUST ### hjust of the labels. ){ stopifnot(is.function(FUN)) stopifnot(length(HJUST)==1) stopifnot(is.numeric(HJUST)) stopifnot(is.finite(HJUST)) function(d,...)gapply(d,function(d,...){ i <- FUN(d$x)==d$x if(length(i)==0){ data.frame() }else{ sub.df <- d[i,] if(nrow(sub.df) > 1){ y.target <- mean(range(sub.df$y))
sub.df <- sub.df[1,]
sub.df$y <- y.target } sub.df$hjust <- HJUST
sub.df$vjust <- 0.5 sub.df } }) ### A Positioning Method like first.points or last.points. } dl.combine <- structure(function # Combine output of several methods ### Apply several Positioning methods to the original data frame. (... ### Several Positioning Methods. ){ FUNS <- list(...) pf <- function(d,...){ dfs <- lapply(FUNS,apply.method,d,...) res <- data.frame() for(df in dfs){ ## if cex is undefined, we will get NAs which will not be ## plotted. if(!"cex"%in%names(df)){ df$cex <- 1
}

## we need to do merge to keep all the columns around.
if(nrow(res))res <- merge(df,res,all=TRUE)
else res <- df
}
res
}
pf
### A Positioning Method that returns the combined data frame after
### applying each specified Positioning Method.
},ex=function(){

## Simple example: label the start and endpoints
library(nlme)
library(lattice)
ratplot <- xyplot(
weight~Time|Diet,BodyWeight,groups=Rat,type='l',layout=c(3,1))
both <- dl.combine("first.points","last.points")
rat.both <- direct.label(ratplot,"both")
print(rat.both)

## same as repeated call to direct.label:
rat.repeated <-
direct.label(direct.label(ratplot,"last.points"),"first.points")
print(rat.repeated)

## same with ggplot2:
if(require(ggplot2)){
rp2 <- qplot(
Time,weight,data=BodyWeight,geom="line",facets=.~Diet,colour=Rat)
print(direct.label(direct.label(rp2,"last.points"),"first.points"))
print(direct.label(rp2,"both"))
}

## more complex example: first here is a function for computing the
## lasso path.
mylars <- function
## Least angle regression algorithm for calculating lasso solutions.
(x,
## Matrix of predictor variables.
y,
## Vector of responses.
epsilon=1e-6
## If correlation < epsilon, we are done.
){
xscale <- scale(x) # need to work with standardized variables
b <- rep(0,ncol(x))# coef vector starts at 0
names(b) <- colnames(x)
ycor <- apply(xscale,2,function(xj)sum(xj*y))
j <- which.max(ycor) # variables in active set, starts with most correlated
alpha.total <- 0
out <- data.frame()
while(1){## lar loop
xak <- xscale[,j] # current variables
r <- y-xscale%*%b # current residual
## direction of parameter evolution
delta <- solve(t(xak)%*%xak)%*%t(xak)%*%r
## Current correlations (actually dot product)
intercept <- apply(xscale,2,function(xk)sum(r*xk))
## current rate of change of correlations
z <- xak%*%delta
slope <- apply(xscale,2,function(xk)-sum(z*xk))
## store current values of parameters and correlation
out <- rbind(out,data.frame(variable=colnames(x),
coef=b,
corr=abs(intercept),
alpha=alpha.total,
arclength=sum(abs(b)),
coef.unscaled=b/attr(xscale,"scaled:scale")))
if(sum(abs(intercept)) < epsilon)#corr==0 so we are done
return(transform(out,s=arclength/max(arclength)))
## If there are more variables we can enter into the regression,
## then see which one will cross the highest correlation line
## first, and record the alpha value of where the lines cross.
d <- data.frame(slope,intercept)
d[d$intercept<0,] <- d[d$intercept<0,]*-1
d0 <- data.frame(d[j[1],])# highest correlation line
d2 <- data.frame(rbind(d,-d),variable=names(slope))#reflected lines
## Calculation of alpha for where lines cross for each variable
d2$alpha <- (d0$intercept-d2$intercept)/(d2$slope-d0$slope) subd <- d2[(!d2$variable%in%colnames(x)[j])&d2$alpha>epsilon,] subd <- subd[which.min(subd$alpha),]
nextvar <- subd$variable alpha <- if(nrow(subd))subd$alpha else 1
## If one of the coefficients would hit 0 at a smaller alpha
## value, take it out of the regression and continue.
hit0 <- xor(b[j]>0,delta>0)&b[j]!=0
alpha0 <- -b[j][hit0]/delta[hit0]
takeout <- length(alpha0)&&min(alpha0) < alpha
if(takeout){
i <- which.min(alpha0)
alpha <- alpha0[i]
}
b[j] <- b[j]+alpha*delta ## evolve parameters
alpha.total <- alpha.total+alpha
## add or remove a variable from the active set
j <- if(takeout)j[j!=which(names(i)==colnames(x))]
else c(j,which(nextvar==colnames(x)))
}
}

## Calculate lasso path, plot labels at two points: (1) where the
## variable enters the path, and (2) at the end of the path.
if(require(lars)){
data(diabetes,envir=environment())
dres <- with(diabetes,mylars(x,y))
P <- xyplot(coef~arclength,dres,groups=variable,type="l")
mylasso <- dl.combine("lasso.labels", "last.qp")
plot(direct.label(P,"mylasso"))
}

})

gapply.fun <- structure(function # Direct label groups independently
### Makes a function you can use to specify the location of each group
### independently.
(expr
### Expression that takes a subset of the d data frame, with data from
### only a single group, and returns the direct label position.
){
foo <- substitute(expr)
f <- function(d,...)eval(foo)
src <- paste("gapply.fun(",paste(deparse(foo),collapse="\n"),")",sep="")
pf <- structure(function(d,...)gapply(d,f,...),"source"=src)
pf
### A Positioning Function.
},ex=function(){
complicated <- list(dl.trans(x=x+10),
gapply.fun(d[-2,]),
rot=c(30,180))
library(lattice)
})

dl.trans <- structure(function # Direct label data transform
### Make a function that transforms the data. This is for conveniently
### making a function that calls transform on the data frame, with the
### arguments provided. See examples.
(...
### Arguments to pass to transform.
){
L <- as.list(match.call())[-1]
pf <- function(d,...)do.call("transform",c(list(d),L))
pf
### A Positioning Function.
},ex=function(){
complicated <- list(dl.trans(x=x+10),
gapply.fun(d[-2,]),
rot=c(30,180))
library(lattice)
})

dl.move <- structure(function # Manually move a direct label
### Sometimes there is 1 label that is placed oddly by another
### Positioning Function. This function can be used to manually place
### that label in a good spot.
(group,
### Group to change.
x,
### Horizontal position of the new label.
y,
### Vertical position of the new label. If missing(y) and !missing(x)
### then we will calculate a new y value using linear interpolation.
...
### Variables to change for the specified group
){
L <- list(...)
pos <- list()
if(!missing(x))pos$x <- x if(!missing(y))pos$y <- y
pf <- function(d,...,axes2native){
native <- axes2native(do.call(data.frame,pos))
## first convert user-specified axes units to cm
for(var in names(pos)){
u <- unit(native[[var]],"native")
L[[var]] <- convertUnit(u,"cm",var,"location",var,"location")
}
v <- d$groups==group for(N in names(L)) d[v,N] <- L[[N]] ## maybe generalize this to be symmetric on x and y one day? if("x" %in% names(L) && (!"y" %in% names(L))){ orig <- attr(d,"orig.data") orig <- orig[orig$label==group,]
## do linear interpolation to find a good y-value
f <- with(orig,approxfun(x,y))
d[v,"y"] <- f(L$x) } d } pf ### A Positioning Function that moves a label into a good spot. },ex=function(){ if(require(ggplot2)){ library(lattice) scatter <- xyplot(jitter(cty)~jitter(hwy),mpg,groups=class,aspect=1) dlcompare(list(scatter), list("extreme.grid", +dl.move=list(extreme.grid,dl.move("suv",15,15)))) p <- qplot(log10(gamma),rate,data=svmtrain,group=data,colour=data, geom="line",facets=replicate~nu) adjust.kif <- dl.move("KIF11",-0.9,hjust=1,vjust=1) dlcompare(list(p+xlim(-8,7)), list("last.points", +dl.move=list(last.points,adjust.kif))) } }) ### Jitter the label positions. dl.jitter <- dl.trans(x=jitter(x),y=jitter(y)) calc.boxes <- function ### Calculate boxes around labels, for collision detection. (d, debug=FALSE, ... ){ vp <- current.viewport() convert <- function(str.prop, worh=str.prop){ conv <- get(paste("convert",worh,sep="")) stri <- get(paste("string", str.prop, sep="")) as.numeric(sapply(seq_along(d$groups),function(i){
if("cex"%in%names(d))vp$gp <- gpar(cex=d$cex[i])
pushViewport(vp)
if(debug)grid.rect() ##highlight current viewport
cm <- conv(stri(as.character(d$label[i])),"cm") popViewport() cm })) } ## abs since we have a weird bug with ggplot2 sometimes d$w <- abs(convert("Width"))
d$h <- abs(convert("Height")) d$descent <- abs(convert("Descent", "Height"))
calc.borders(d)
}

### Calculate big boxes around the means of each cluster.
big.boxes <- list("get.means","calc.boxes","enlarge.box")

### Point halfway between the min and max
midrange <- function(x){
r <- range(x)
(r[2]-r[1])/2+r[1]
}

### Point in the middle of the min and max for each group.
visualcenter <- gapply.fun(dl.summarize(d,x=midrange(x),y=midrange(y)))

### Positioning Function for the mean of each cluster of points.
get.means <-
gapply.fun(dl.summarize(d,x=mean(x),y=mean(y)))

calc.borders <- function
### Calculate bounding box based on newly calculated width and height.
(d,
### Data frame of point labels, with new widths and heights in the w
### and h columns.
...
### ignored.
){
for(just in c("hjust","vjust")){
if(!just %in% names(d)){
d[,just] <- 0.5
}
}
if(!"descent" %in% names(d)){
d$descent <- 0 } d$top <- d$y+(1-d$vjust)*d$h d$bottom <- d$y-d$vjust*d$h - d$descent
d$right <- d$x+(1-d$hjust)*d$w
d$left <- d$x-d$hjust*d$w
d
}

polygon.method <- function
### Make a Positioning Method that places non-overlapping speech
### polygons at the first or last points.
(top.bottom.left.right,
### Character string indicating what side of the plot to label.
offset.cm=0.1,
### Offset from the polygon to the most extreme data point.
custom.colors=NULL
### Positioning method applied just before draw.polygons, can set
### box.color and text.color for custom colors.
){
if(is.null(custom.colors)){
custom.colors <- gapply.fun({
rgb.mat <- col2rgb(d[["colour"]])
d$text.color <- with(data.frame(t(rgb.mat)), { ifelse( (0.3 * red) + (0.59 * green) + (0.11 * blue)/255 < 0.5, "white", "black") }) d }) } opposite.side <- c( left="right", right="left", top="bottom", bottom="top")[[top.bottom.left.right]] direction <- if( top.bottom.left.right %in% c("bottom", "left") ) -1 else 1 min.or.max <- if( top.bottom.left.right %in% c("top", "right") ) max else min if(top.bottom.left.right %in% c("left", "right")){ min.or.max.xy <- "x" qp.target <- "y" qp.max <- "top" qp.min <- "bottom" padding.h.factor <- 2 padding.w.factor <- 1 limits.fun <- ylimits reduce.method <- "reduce.cex.lr" }else{ min.or.max.xy <- "y" qp.target <- "x" qp.max <- "right" qp.min <- "left" padding.h.factor <- 1 padding.w.factor <- 2 limits.fun <- xlimits reduce.method <- "reduce.cex.tb" } list( paste0(top.bottom.left.right, ".points"), function(d,...){ ## set the end of the speech polygon to the original data point. for(xy in c("x", "y")){ extra.coord <- sprintf(# e.g. left.x "%s.%s", opposite.side, xy) d[[extra.coord]] <- d[[xy]] } ## set the speech polygon position to the min or max of all ## label positions. e.g. max d[[min.or.max.xy]] <- min.or.max(d[[min.or.max.xy]]) + offset.cm*direction d }, "calc.boxes", reduce.method, function(d, ...){ d$h <- d$h + padding.cm * padding.h.factor d$w <- d$w + padding.cm * padding.w.factor d }, "calc.borders", qp.labels( qp.target, qp.min, qp.max, make.tiebreaker(min.or.max.xy, qp.target), limits.fun), "calc.borders", custom.colors, "draw.polygons") } ### Draw polygons around label positions. draw.polygons <- function(d,...){ for(side in c("left", "right", "top", "bottom")){ for(xy in c("x", "y")){ col.name <- paste0(side, ".", xy) if(!col.name %in% names(d)){ d[[col.name]] <- NA } } } if(! "box.color" %in% names(d)){ d$box.color <- "black"
}
if(! "text.color" %in% names(d)){
d$text.color <- "white" } for(i in 1:nrow(d))with(d[i,], { L <- list( x=c(left.x, left, top.x, right, right.x, right, bottom.x, left), y=c(left.y, top, top.y, top, right.y, bottom, bottom.y, bottom)) for(xy.name in names(L)){ xy <- L[[xy.name]] L[[xy.name]] <- xy[!is.na(xy)] } grid::grid.polygon( L$x, L$y, default.units="cm", gp=grid::gpar(col=box.color, fill=colour), name="directlabels.draw.polygon" ) }) d$colour <- d$text.color d } ### Positioning Function that draws boxes around label positions. Need ### to have previously called calc.boxes. Does not edit the data ### frame. draw.rects <- function(d,...){ if(is.null(d$box.color))d$box.color <- "black" if(is.null(d$fill))d$fill <- "white" for(i in 1:nrow(d)){ with(d[i,], grid.rect( gp = gpar(col = box.color, fill = fill), vp = viewport(x, y, w, h, "cm", c(hjust, vjust), angle=rot), name="directlabels.draw.rects" )) } d } ### Sequentially bump labels up, starting from the bottom, if they ### collide with the label underneath. bumpup <- function(d,...){ ## If there is only 1, then there is no collision detection to do. if(nrow(d) == 1)return(d) d <- calc.boxes(d)[order(d$y),]
"%between%" <- function(v,lims)lims[1]<v&v<lims[2]
obox <- function(x,y){
tocheck <- with(x,c(left,(right-left)/2+left,right))
tocheck %between% with(y,c(left,right))
}
for(i in 2:nrow(d)){
dif <- d$bottom[i]-d$top[i-1]
## here we are trying to test if box i can possibly collide with
## the box below it! Originally we checked if the bottom points of
## this box fall in the box below it, but this causes problems
## since we are reassigning box positions. If all boxes start at
## the same place, 2 will get moved up, 3 will not since its
## bottom points are no longer inside box 2. Solution: Look at box
## left and right limits and see if they collide!

## GOTCHA: If all the boxes are exactly the same size, on top of
## each other, then if we only examine left and right points of
## each box, none of the boxes will be detected as
## overlapping. One way to fix this is change > to >= in %between%
## but this is a bad idea since you can have boxes right next to
## each other that we don't want to move, that would be detected
## as overlapping. Solution: use the midpoint of the box as well!
overlap <- c(obox(d[i,],d[i-1,]),obox(d[i-1,],d[i,]))
if(dif < 0 && any(overlap)){
d$bottom[i] <- d$bottom[i]-dif
d$top[i] <- d$top[i]-dif
d$y[i] <- d$y[i]-dif
}
}
d
}

### Remove rows for which either x or y is NA
ignore.na <- function(d,...){
not.na <- is.finite(d$x) if("y"%in% names(d)){ not.na <- not.na & is.finite(d$y)
}
d[not.na,]
}

qp.labels <- structure(function# Make a Positioning Method for non-overlapping lineplot labels
### Use a QP solver to find the best places to put the points on a
### line, subject to the constraint that they should not overlap.
(target.var,
### Variable name of the label target.
lower.var,
### Variable name of the lower limit of each label bounding box.
upper.var,
### Variable name of the upper limit of each label bounding box.
order.labels=function(d)order(d[,target.var]),
### Function that takes the data.frame of labels and returns an
### ordering, like from the order function. That ordering will be used
### to reorder the rows. This is useful to e.g. break ties when two
### groups have exactly the same value at the endpoint near the label.
limits=NULL
### Function that takes the data.frame of labels an returns a numeric
### vector of length 2. If finite, these values will be used to add
### constraints to the QP: limits[1] is the lower limit for the first
### label's lower.var, and limits[2] is the upper limit for the last
### labels's upper.var. Or NULL for no limits.
){
## Reality checks. These also have the side effect of forcing
## evaluation of all the arguments in the returned closure.
stopifnot(is.function(order.labels))
essential <- list(target.var,upper.var,lower.var)
for(v in essential){
stopifnot(is.character(v))
stopifnot(length(v)==1)
}
stopifnot(is.function(limits)||is.null(limits))

function(d,...){

## If there is only 1 label, there is no collision detection to
## do, so just return it.
if(nrow(d)==1)return(d)

##browser()

## Reality checks.
for(v in essential){
if(! v %in% names(d)){
stop("need to have calculated ",v)
}
}

## sorts data so that target_1 <= target_2 <= ... <= target_n.
d <- d[order.labels(d),]

## check limits to see if there is enough space, given specified
## cex.
if(is.function(limits)){
l <- limits(d)
stopifnot(is.numeric(l))
stopifnot(length(l)==2)
stopifnot(l[1]<l[2])

h.available <- l[2] - l[1]
h <- d[,upper.var]-d[,lower.var]
h.occupied <- sum(h)
if(h.occupied > h.available){ ## then the feasible set is empty.
## total hack:
cex <- h.available / h.occupied  * 0.9
if("cex" %in% names(d)){
d$cex <- d$cex * cex
}else{
d$cex <- cex } d <- calc.boxes(d) } } ## These are the standard form matrices described in the ## directlabels poster. target <- d[,target.var] k <- nrow(d) D <- diag(rep(1,k)) Ik <- diag(rep(1,k-1)) A <- rbind(0,Ik)-rbind(Ik,0) y.up <- d[,upper.var] y.lo <- d[,lower.var] b0 <- (y.up-target)[-k] + (target-y.lo)[-1] ## limit constraints. if(is.function(limits)){ if(is.finite(l[1])){ c.vec <- rep(0,k) c.vec[1] <- 1 A <- cbind(A,c.vec) b0 <- c(b0,l[1]+target[1]-y.lo[1]) } if(is.finite(l[2])){ c.vec <- rep(0,k) c.vec[k] <- -1 A <- cbind(A,c.vec) b0 <- c(b0,y.up[k]-target[k]-l[2]) } } ##print(A) ##print(b0) ##browser() sol <- solve.QP(D,target,A,b0) d[,target.var] <- sol$solution
d
}
### Positioning Method that adjusts target.var so there is no overlap
### of the label bounding boxes, as specified by upper.var and
### lower.var.
},ex=function(){
SegCost$error <- factor(SegCost$error,c("FP","FN","E","I"))
if(require(ggplot2)){
fp.fn.colors <- c(FP="skyblue",FN="#E41A1C",I="black",E="black")
fp.fn.sizes <- c(FP=2.5,FN=2.5,I=1,E=1)
fp.fn.linetypes <- c(FP="solid",FN="solid",I="dashed",E="solid")
err.df <- subset(SegCost,type!="Signal")

kplot <- ggplot(err.df,aes(segments,cost))+
geom_line(aes(colour=error,size=error,linetype=error))+
facet_grid(type~bases.per.probe)+
scale_linetype_manual(values=fp.fn.linetypes)+
scale_colour_manual(values=fp.fn.colors)+
scale_size_manual(values=fp.fn.sizes)+
scale_x_continuous(limits=c(0,20),breaks=c(1,7,20),minor_breaks=NULL)+
theme_bw()+theme(panel.margin=grid::unit(0,"lines"))

## The usual ggplot without direct labels.
print(kplot)

## Get rid of legend for direct labels.
no.leg <- kplot+guides(colour="none",linetype="none",size="none")

## Default direct labels.
direct.label(no.leg)

## Explore several options for tiebreaking and limits. First let's
## make a qp.labels Positioning Method that does not tiebreak.
no.tiebreak <- list("first.points",
"calc.boxes",
qp.labels("y","bottom","top"))
direct.label(no.leg, no.tiebreak)

## Look at the weird labels in the upper left panel. The E curve is
## above the FN curve, but the labels are the opposite! This is
## because they have the same y value on the first points, which are
## the targets for qp.labels. We need to tiebreak.
qp.break <- qp.labels("y","bottom","top",make.tiebreaker("x","y"))
tiebreak <- list("first.points",
"calc.boxes",
"qp.break")
direct.label(no.leg, tiebreak)

## Enlarge the text size and spacing.
tiebreak.big <- list("first.points",
cex=2,
"calc.boxes",
dl.trans(h=1.25*h),
"calc.borders",
"qp.break")
direct.label(no.leg, tiebreak.big)

## Even on my big monitor, the FP runs off the bottom of the screen
## in the top panels. To avoid that you can specify a limits
## function.

## Below, the ylimits function uses the limits of each panel, so
## labels appear inside the plot region. Also, if you resize your
## window so that it is small, you can see that the text size of the
## labels is decreased until they all fit in the plotting region.
qp.limited <-  qp.labels("y","bottom","top",make.tiebreaker("x","y"),ylimits)
tiebreak.lim <- list("first.points",
cex=2,
"calc.boxes",
dl.trans(h=1.25*h),
"calc.borders",
"qp.limited")
direct.label(no.leg, tiebreak.lim)
}
})

### Make text bounding box larger by some amount.
enlarge.box <- function(d,...){
if(!"h"%in%names(d))stop("need to have already calculated height and width.")
calc.borders(within(d,{
w <- w+h
h <- h+h
}))
}

in1which <- function
### Calculate which points fall in a box.
(p,
### data frame of points with columns x and y and many rows.
box
### data frame of 1 row with columns left right top bottom.
){
p$x>=box$left & p$x<=box$right & p$y<=box$top & p$y>=box$bottom
}

### Calculate how many points fall in a box.
in1box <- function(p,box)sum(in1which(p,box))

### Make a Positioning Method that will, for every piece, select
### points and assign a vjust value.
label.pieces <- function(FUN,VJUST){
function(d,...){
processed <- gapply(d,function(d,...)d[FUN(d$y),],groups="piece") transform(processed,hjust=0.5,vjust=VJUST) } } inside <- function ### Calculate for each box how many points are inside. (boxes, ### Data frame of box descriptions, each row is 1 box, need columns ### left right top bottom. points ### Data frame of points, each row is 1 point, need columns x y. ){ sapply(1:nrow(boxes),function(i)in1box(points,boxes[i,])) ### Vector of point counts for each box. } dl.summarize <- function ### summarize which preserves important columns for direct labels. (OLD, ### data frame ... ){ rownames(OLD) <- NULL NEW <- unique(transform(OLD,...)) to.copy <- names(OLD)[!names(OLD)%in%names(NEW)] for(N in to.copy) NEW[,N] <- OLD[,N] NEW } gapply <- function ### apply a Positioning Method to every group. works like ddply from ### plyr package, but the grouping column is always called groups, and ### the Positioning Method is not necessarily a function (but can be). (d, ### data frame with column groups. method, ### Positioning Method to apply to every group separately. ..., ### additional arguments, passed to Positioning Methods. groups="groups" ### can also be useful for piece column. ){ stopifnot(is.data.frame(d)) dfs <- split(d,as.character(d[[groups]])) f <- function(d,...){ res <- apply.method(method,d,columns.to.check=c("x","y"),...) if(nrow(res)){ res[[groups]] <- d[[groups]][1] res[["label"]] <- d[["label"]][1] } res } results <- lapply(dfs,f,...) if(any(!sapply(results,is.data.frame))){ print(results) stop("Positioning Method did not return data.frame") } do.call(rbind,results) ### data frame of results after applying FUN to each group in d. } ### Label the points furthest from the middle for each group. extreme.points <- function(d,...){ d$dist.from.center <- sqrt((d$x-midrange(d$x))^2+(d$y-midrange(d$y))^2)
gapply(d,function(d,...)d[which.max(d$dist.from.center),]) } edges.to.outside <- function ### Given a list of edges from the convex or alpha hull, and a list of ### cluster centers, calculate a point near to each cluster on the ### outside of the hull. (edges,centers,debug=FALSE,...){ if(debug){ with(centers,grid.points( x,y,pch="+",default.units="cm", name="directlabels.points.edges.to.outside" )) with(edges,grid.segments( x1,y1,x2,y2,default.units="cm", name="directlabels.segments.edges.to.outside" )) } closepts <- gapply(centers,project.onto.segments,edges,debug=debug,...) closepts$vjust <- ifelse(closepts$y-centers$y>0,0,1)
closepts$hjust <- ifelse(closepts$x-centers$x>0,0,1) r <- apply.method("big.boxes",closepts) r$x <- (r$right-r$left)/2+r$left r$y <- (r$top-r$bottom)/2+r$bottom r$hjust <- 0.5
r$vjust <- 0.5 r } ### Calculate closest point on the alpha hull with size of the boxes, ### and put it outside that point. outside.ahull <- function(d,...){ edges.to.outside(ahull.points(d),visualcenter(d),...) } ### Calculate closest point on the convex hull and put it outside that ### point. Assume d is the center for each point cloud and then use ### orig.data to calculate hull. outside.chull <- function(d,...){ edges.to.outside(chull.points(d),visualcenter(d),...) } project.onto.segments <- function ### Given a point and a set of line segments representing a convex or ### alpha hull, calculate the closest point on the segments. (m, ### m is 1 row, a center of a point cloud, we need to find the ### distance to the closest point on each segment of the convex ### hull. h, ### Data frame describing the line segments of the convex or alpha ### hull. debug=FALSE, ... ### ignored ){ h$s <- (h$y2-h$y1)/(h$x2-h$x1)
## the closest point on the line formed by expanding this line
## segment (this expression is calculated by finding the minimum
## of the distance function).
h$xstar <- (m$x + m$y*h$s + h$x1*h$s^2 - h$s*h$y1)/(h$s^2+1) h$minval <- apply(cbind(h$x1,h$x2),1,min)
h$maxval <- apply(cbind(h$x1,h$x2),1,max) ## xopt is the closest point on the line segment h$xopt <- ifelse(h$xstar<h$minval,h$minval, ifelse(h$xstar>h$maxval,h$maxval,h$xstar)) h$yopt <- h$s*(h$xopt-h$x1)+h$y1
## distance to each point on line segment from the center
h$d <- (m$x-h$xopt)^2+(m$y-h$yopt)^2 i <- which.min(h$d)
result <- with(h[i,],data.frame(x=xopt,y=yopt))
if(debug){
grid.segments(
m$x,m$y,result$x,result$y,default.units="cm",
name="directlabels.segments.project.onto.segments"
)
}
result
}

### Make a Positioning Function from a set of points on a vertical
### line that will be spaced out using qp.labels.
vertical.qp <- function(M){
avoid.collisions <-
qp.labels("y","bottom","top",make.tiebreaker("x","y"),ylimits)
list(M,"reduce.cex.lr",avoid.collisions)
}

### Make a tiebreaker function that can be used with qp.labels.
make.tiebreaker <- function(x.var,tiebreak.var){
force(x.var)
force(tiebreak.var)
function(d,...){
orig <- attr(d,"orig.data")
xvals <- unique(orig[,x.var])
x <- unique(d[,x.var])
if(length(x)>1){
stop("labels are not aligned")
}
xvals <- xvals[order(abs(xvals-x))]
group.dfs <- split(orig,orig$groups) m <- do.call(cbind,lapply(d$groups,function(g){
df <- group.dfs[[as.character(g)]]
group.x <- df[,x.var]
group.y <- df[,tiebreak.var]
all.unique <- length(unique(group.x)) == length(group.x)
## approx gives the following error if we only have one point:
## need at least two non-NA values to interpolate
x.not.missing <- !is.na(group.x)
y.not.missing <- !is.na(group.y)
not.missing <- sum(x.not.missing & y.not.missing)
if(all.unique && 1 < not.missing){
## rule=2 means to use the most extreme value instead of the
## default NA, for any points that are outside the range -
## this is required to get a good ordering in some cases.
approx(group.x, group.y, xvals, rule=2)$y }else{ group.y } })) ## useful for debugging: ##print(m) L <- lapply(1:nrow(m),function(i)m[i,]) do.call(order,L) } } ### Calculate the default alpha parameter for ashape based on the ### average size of label boxes. default.ahull <- function(d,...){ labels <- apply.method("big.boxes",d,...) mean(unlist(labels[,c("w","h")])) } ### Calculate the points on the ashape. ahull.points <- function(d,...,ahull=default.ahull(d)){ xy <- unique(d[,c("x","y")]) as <- alphahull::ashape(xy,alpha = ahull) as.data.frame(as$edges)
}

### Calculate the points on the convex hull.
chull.points <- function(d,...){
bpts <- d[with(d,chull(x,y)),]
r <- data.frame(i1=1:nrow(bpts),i2=c(2:nrow(bpts),1))
r$x1 <- bpts$x[r$i1] r$y1 <- bpts$y[r$i1]
r$x2 <- bpts$x[r$i2] r$y2 <- bpts$y[r$i2]
r
}

check.for.columns <- function
### Stop if a data.frame does not have some columns.
(d,
### data.frame to check.
must.have
### column names to check.
){
stopifnot(is.character(must.have))
for(N in must.have){
if(! N %in% names(d)){
stop("data must have a column named ",N)
}
}
}

apply.method <- function # Apply a Positioning Method
### Run a Positioning Method list on a given data set. This function
### contains all the logic for parsing a Positioning Method and
### sequentially applying its elements to the input data to obtain the
### label positions.
(method,
### Direct labeling Positioning Method. Starting from the data frame
### of points to plot for the panel, the elements of the Positioning
### Method list are applied in sequence, and then each row of the
### resulting data frame is used to draw a direct label. The
### elements of a Positioning Method list can be
### \itemize{
### \item a Positioning Function is any function(d,...) which takes a
### data.frame d with columns x,y,groups and returns another
### data.frame representing the positions of the desired direct
### labels. For a description of all the columns that are interpreted
### for drawing direct labels, see \code{\link{drawDetails.dlgrob}}.
### For example, maxvar.points is a Positioning Function that returns
### a data.frame with columns x,y,groups,hjust,vjust.
### \item a character vector of length 1 is treated as the name of an
### R object. For example, specifying "maxvar.points" means to look up
### the variable called maxvar.points and use that. Using the name of
### a Positioning Function is preferable to specifying the Positioning
### Function itself, since then the name is visible in the Positioning
### Method list, which is more interpretable when debugging.
### \item a named list element is used to add or update variables in
### the data.frame of direct labels to plot. For example
### list("first.points",cex=1.5) means take only the first points of
### every group and then set the cex column to 1.5.
### \item an element of a Positioning Method list can be another
### Positioning Method list, in which case the elements of the inner
### list are applied.
### }
d,
### Data frame to which we apply the Positioning Method. The x and y
### columns should be in centimeters (cm), so that Positioning Methods
### can easily calculate the L2/Euclidean/visual distance between
### pairs of points.
columns.to.check=c("x","y","groups","label"),
### After applying each Positioning Method list element, we check for
### the presence of these columns, and if not found we stop with an
### error.
...,
### Named arguments, passed to Positioning Functions.
debug=FALSE
### If TRUE, print each Positioning Method list elmenent and the
### direct label data.frame that results from its evaluation.
){
attr(d,"orig.data") <- d ##DONT DELETE: if the first Positioning
##Method needs orig.data, this needs to be
##here!
check.for.columns(d,columns.to.check)
if(!is.list(method))method <- list(method)
isconst <- function(){
m.var <- names(method)[1]
!(is.null(m.var)||m.var=="")
}
islist <- function()is.list(method[[1]])
isref <- function()(!isconst())&&is.character(method[[1]])
while(length(method)){
if(debug)print(method[1])##not [[1]] --- named items!
##browser()
## Resolve any names or nested lists
while(islist()||isref()){
if(islist()){
method <- c(method[[1]],method[-1])
}else{ #must be character -> get the fun(s)
if(length(method[[1]])>1){
warning("using first element of character vector")
method[[1]] <- method[[1]][1]
}
method <- c(get(method[[1]]),method[-1])
}
}
if(isconst())
d[[names(method)[1]]] <- method[[1]]
else{ #should be a Positioning Function
old <- d
group.dfs <- split(d,d$groups) group.specific <- lapply(group.dfs,only.unique.vals) to.restore <- Reduce(intersect,lapply(group.specific,names)) d <- method[[1]](d,debug=debug,...) if(length(d)==0){#NULL or list() return(data.frame()) }else{ check.for.columns(d,columns.to.check) if("groups" %in% names(d)){ ## do not restore if they are present in the returned list! to.restore <- to.restore[!to.restore %in% names(d)] for(N in to.restore){ d[[N]] <- NA group.vec <- paste(unique(d$groups))
for(g in group.vec){
old.val <- group.specific[[g]][,N]
if(is.factor(old.val))old.val <- paste(old.val)
d[d$groups==g,N] <- old.val } } } } attr(d,"orig.data") <- if(is.null(attr(old,"orig.data")))old else attr(old,"orig.data") } if(debug){ print(d) } method <- method[-1] } d ### The final data frame returned after applying all of the items in ### the Positioning Method list, with x and y in units of cm. } ### Create a 1-row data.frame consisting of only the columns for which ### there is only 1 unique value. only.unique.vals <- function(d,...){ unique.vals <- lapply(d,unique) n.vals <- sapply(unique.vals,length) do.call(data.frame,unique.vals[n.vals==1]) } ### to hard-code label positions... static.labels <- function(x,y,groups,...){ L <- list(...) force(x) force(y) force(groups) function(d,...,axes2native){ native <- axes2native(data.frame(x,y)) L$x <- convertX(unit(native$x,"native"),"cm",valueOnly=TRUE) L$y <- convertY(unit(native$y,"native"),"cm",valueOnly=TRUE) L$groups <- groups
do.call(data.frame,L)
}
}

### Return the positions of the plot vertical limits in cm, for use as
### the limit argument to qp.labels.
ylimits <- function(...){
convertY(unit(c(0,1),"npc"),"cm",valueOnly=TRUE)
}

### Return the positions of the plot horizontal limits in cm, for use
### as the limit argument to qp.labels.
xlimits <- function(...){
convertX(unit(c(0,1),"npc"),"cm",valueOnly=TRUE)
}

reduce.cex <- structure(function
### If edges of the text are going out of the plotting
### region, then decrease cex until it fits. We call calc.boxes
### inside, so you should set cex before using this.
(sides
### string: lr (left and right) or tb (top and bottom).
){
if(sides=="lr"){
hi <- "right"
lo <- "left"
limits.fun <- xlimits
}else{
hi <- "top"
lo <- "bottom"
limits.fun <- ylimits
}
function(d,...){
d <- calc.boxes(d)
l <- limits.fun()
positive.part <- function(x)ifelse(x>0,x,0)
hi.pp <- positive.part(d[, hi]-l[2])
lo.pp <- positive.part(l[1]-d[, lo])
w <- d[, hi]-d[, lo]
if(is.null(d$cex)){ d$cex <- 1
}
d$cex <- (w-hi.pp)/w * (w-lo.pp)/w * d$cex
calc.boxes(d)
}
},ex=function(){

if(require(lars) && require(ggplot2)){
data(diabetes,package="lars",envir=environment())
X <- diabetes$x colnames(X) <- paste(colnames(X), colnames(X)) fit <- lars(X,diabetes$y,type="lasso")
beta <- scale(coef(fit),FALSE,1/fit$normx) arclength <- rowSums(abs(beta)) path.list <- list() for(variable in colnames(beta)){ standardized.coef <- beta[, variable] path.list[[variable]] <- data.frame(step=seq_along(standardized.coef), arclength, variable, standardized.coef) } path <- do.call(rbind, path.list) p <- ggplot(path,aes(arclength,standardized.coef,colour=variable))+ geom_line(aes(group=variable)) ## the legend isn't very helpful. print(p) ## add direct labels at the end of the lines. direct.label(p, "last.points") ## on my screen, some of the labels go off the end, so we can use ## this Positioning Method to reduce the text size until the labels ## are on the plot. direct.label(p, list("last.points",reduce.cex("lr"))) ## the default direct labels for lineplots are similar. direct.label(p) } }) ### If edges of the text are going left or right out of the plotting ### region, then decrease cex until it fits. reduce.cex.lr <- reduce.cex("lr") ### If edges of the text are going over the top or bottom of the ### plotting region, then decrease cex until it fits. reduce.cex.tb <- reduce.cex("tb") empty.grid <- function ### Label placement method for scatterplots that ensures labels are ### placed in different places. A grid is drawn over the whole ### plot. Each cluster is considered in sequence and assigned to the ### point on this grid which is closest to the point given by ### the input data points. Makes use of attr(d,"orig.data"). (d, ### Data frame of target points on the scatterplot for each label. debug=FALSE, ### Show debugging info on the plot? ... ### ignored. ){ NREP <- 10 orig <- attr(d,"orig.data") all.points <- orig[,c("x","y")] if(any(table(d$groups)>1))d <- get.means(d)
label.targets <- d
ranges <- list(x=convertX(unit(c(0,1),"npc"),"cm",valueOnly=TRUE),
y=convertY(unit(c(0,1),"npc"),"cm",valueOnly=TRUE))
gl <- function(v){
s <- seq(min(all.points[,v]),max(all.points[,v]),l=NREP)
if(expand){
dif <- s[2]-s[1]
s <- seq(min(ranges[[v]])-expand*dif,
max(ranges[[v]])+expand*dif,
l=NREP+2*expand)
}
list(centers=s,diff=s[2]-s[1])
}
hgrid <- function(x,w){
hboxes <- floor(diff(ranges[[x]])/r[,w])
(-expand:(hboxes+expand-1))*r[,w]+r[,w]/2+min(ranges[[x]])
}
if(debug)with(label.targets,{
grid.points(
x,y,default.units="cm",gp=gpar(col="green"),
name="directlabels.points.empty.grid.label.targets"
)
})
draw <- function(g){
gridlines <- with(g,list(x=unique(c(left,right)),y=unique(c(top,bottom))))
drawlines <- function(a,b,c,d,name)
grid.segments(a,b,c,d,"cm",name=name,gp=gpar(col="grey"))
with(gridlines,drawlines(
min(x),y,max(x),y,"directlabels.segments.empty.grid.vertical"))
with(gridlines,drawlines(
x,min(y),x,max(y),"directlabels.segments.empty.grid.horizontal"))
}
res <- data.frame()
label.targets <-
label.targets[order(nchar(as.character(label.targets$groups))),] for(v in label.targets$groups){
r <- label.targets[label.targets$groups==v,] no.points <- data.frame() expand <- 0 while(nrow(no.points)==0){ boxes <- if("left"%in%names(label.targets)){ list(x=hgrid("x","w"),y=hgrid("y","h"),w=r$w,h=r$h) }else{ L <- sapply(c("x","y"),gl,simplify=FALSE) list(x=L$x$centers,y=L$y$centers,w=L$x$diff,h=L$y$diff) } boxes <- calc.borders(do.call(expand.grid,boxes)) boxes <- cbind(boxes,data=inside(boxes,all.points)) no.points <- transform(subset(boxes,data==0)) expand <- expand+1 ## look further out if we can't find any labels inside } if(debug)draw(boxes) ## TDH 29 Aug 2012. For every box, figure out the class of the ## point which is its nearest neighbor. no.points$nearest <- NA
for(i in 1:nrow(no.points)){
b <- no.points[i,]
d.orig <- with(orig,(b$x-x)^2+(b$y-y)^2)
no.points[i,"nearest"] <- as.character(orig$groups[which.min(d.orig)]) } ## Only consider boxes that are closest to this class. closest <- no.points[no.points$nearest == rownames(r),]
if(nrow(closest) == 0){
closest <- no.points
}
closest$len <- with(closest,(r$x-x)^2+(r$y-y)^2) best <- closest[closest$len == min(closest$len), ][1, ] res <- rbind(res,transform(r,x=best$x,y=best\$y))
newpts <- with(best,{
expand.grid(x=seq(left,right,l=3),
y=seq(top,bottom,l=3))
})
all.points <- rbind(all.points,newpts)
}
if(debug)with(all.points,grid.points(
x,y,default.units="cm",name="directlabels.points.empty.grid.all.points"))
res
### Data frame with columns groups x y, 1 line for each group, giving
### the positions on the grid closest to each cluster.
}


Try the directlabels package in your browser

Any scripts or data that you put into this service are public.

directlabels documentation built on Jan. 16, 2021, 5:05 p.m.