demo/anscombe.R

### Pairs vs zen plot for the Anscombe's quartet dataset

require(zenplots)

## Prepare the data (we scale x's and y's separately here)
x <- anscombe
xran <- range(x[,1:4])
x[,1:4] <- (x[,1:4]-xran[1])/diff(xran) # scale all x
yran <- range(x[,5:8])
x[,5:8] <- (x[,5:8]-yran[1])/diff(yran) # scale all y

## Pairs plot
pairs(x, gap=0) # doesn't make much sense here (e.g., what shall 'x1 maps to y3' mean?)

## We choose a path through all pairs of variables such that only pairs with
## equal number (1--4) or with equal label ('x' or 'y') appear
ord <- c(1, 5, 6, 2, 3, 7, 8, 4) # path through the variables
## Sorting idea:
## (x,y) appear as they are visible in the zen plot drawn, but we order the
## variables such that:
## a) If there is precisely one x* and one y* variable, we view the pair
##    as a function in the variable x*
## b) If there is no x* variable (two y* variables) or two x* variables
##    (no y* variable), we view the pair as a function in the first variable

## Zen plot with graphics
zenplot(x[,ord], scale="none", # important here
        plot1d="label",
        plot2d=function(x, type="b", plotID, ...) {
            ## Sort the data (as the data set is not sorted)
            is.x <- grepl("x", plotID$name) # logical indicating whether label contains x
            num.x.lab <- sum(is.x) # number of x labels (0, 1 or 2)
            if(num.x.lab==0 || num.x.lab==2) { # both variables are x* or y*
                ix <- 1 # first index
                iy <- NULL # dummy
            } else { # there is precisely one x* and one y* variable
                ix <- which(is.x) # index of the variable named x*
                if(ix == 1) iy <- 2 else iy <- 1
            }
            x <- x[order(x[,ix]),] # sort data in increasing x order
            ## Now plot
            axes_2d_graphics(x=x, ...)
            points_2d_graphics(x=x, type=type, ..., add=TRUE)
            label_2d_graphics(x=x, plotID=plotID, add=TRUE, ...)
            ## If there are both x and y variables, plot a regression line
            if(!is.null(iy)){ # or plotID$plotNo %% 2 == 1 or num.x.lab == 1
                if(ix == 1) { # (x,y) plot
                    abline(lm(x[,2]~x[,1]), col="royalblue3")
                } else { # (y,x) plot; idea: fit regression line to (x,y) and then invert the line
                    lm.coeff <- lm(x[,1]~x[,2])$coefficients
                    intercept <- lm.coeff[1]
                    slope <- lm.coeff[2]
                    abline(a=-intercept/slope, b=1/slope, col="royalblue3") # plot inverted line
                }
            }
        })

## Zen plot with grid
require(grid)
zenplot(x[,ord], scale="none", # important here
        pkg="grid", plot1d="label",
        plot2d=function(x, plotID, draw=FALSE, ...) {
            ## Sort the data (as the data set is not sorted)
            is.x <- grepl("x", plotID$name) # logical indicating whether label contains x
            num.x.lab <- sum(is.x) # number of x labels (0, 1 or 2)
            if(num.x.lab==0 || num.x.lab==2) { # both variables are x* or y*
                ix <- 1 # first index
                iy <- NULL # dummy
            } else { # there is precisely one x* and one y* variable
                ix <- which(is.x) # index of the variable named x*
                if(ix == 1) iy <- 2 else iy <- 1
            }
            x <- x[order(x[,ix]),] # sort data in increasing x order
            ## Now plot
            gAxes <- axes_2d_grid(draw=draw, ...)
            gPoints <- points_2d_grid(x=x, type="o", draw=draw, ...)
            gLabels <- label_2d_grid(x=x, plotID=plotID, draw=draw, loc.y=0.01, ...)
            ## If there are both x and y variables, plot a regression line
            gLine <- if(!is.null(iy)){ # or plotID$plotNo %% 2 == 1 or num.x.lab == 1
                if(ix == 1) { # (x,y) plot
                    lm.coeff <- lm(x[,2]~x[,1])$coefficients
                    intercept <- lm.coeff[1]
                    slope <- lm.coeff[2]
                    functionGrob(function(x) list(x=x, y=slope*x+intercept),
                                 range=c(-0.02, 1),
                                 gp=gpar(col="royalblue3", ...))
                } else { # (y,x) plot; idea: fit regression line to (x,y) and then invert the line
                    lm.coeff <- lm(x[,1]~x[,2])$coefficients
                    intercept <- lm.coeff[1]
                    slope <- lm.coeff[2]
                    functionGrob(function(x) list(x=x, y=(1/slope)*x-intercept/slope),
                                 range=c(intercept-0.02*slope, # where the line hits the x axis (note: it's at y=-0.02)
                                         1), gp=gpar(col="royalblue3", ...)) # grob for inverted line
                }
            } else nullGrob()
            ## Put everything together
            gTree(children=gList(gAxes, gPoints, gLabels, gLine))
        })

## Individual plots to check correctness of the zen plot
if(FALSE) {
    plot(x[order(x[,1]),c(1,5)], type="b", xlim=c(0,1), ylim=c(0,1)) # x1 maps to y1; order according to x1
    plot(x[order(x[,6]),c(6,5)], type="b", xlim=c(0,1), ylim=c(0,1)) # y2 maps to y1; order according to y2
    plot(x[order(x[,2]),c(6,2)], type="b", xlim=c(0,1), ylim=c(0,1)) # y2 maps to x2; order according to x2
    plot(x[order(x[,3]),c(3,2)], type="b", xlim=c(0,1), ylim=c(0,1)) # x3 maps to x2; order according to x3
    plot(x[order(x[,3]),c(3,7)], type="b", xlim=c(0,1), ylim=c(0,1)) # x3 maps to y3; order according to x3
    plot(x[order(x[,8]),c(8,7)], type="b", xlim=c(0,1), ylim=c(0,1)) # y4 maps to y3; order according to y4
    plot(x[order(x[,4]),c(8,4)], type="b", xlim=c(0,1), ylim=c(0,1)) # y4 maps to x4; order according to x4
}

Try the zenplots package in your browser

Any scripts or data that you put into this service are public.

zenplots documentation built on May 2, 2019, 4:34 p.m.