knitr::opts_chunk$set( fig.retina=2, fig.width=6, fig.height=4 )
Beeswarm plots (aka column scatter plots or violin scatter plots) are a way of plotting points that would ordinarily overlap so that they fall next to each other instead. In addition to reducing overplotting, it helps visualize the density of the data at each point (similar to a violin plot), while still showing each data point individually.
ggbeeswarm
provides two different methods to create beeswarm-style plots using ggplot2. It does this by adding two new ggplot geom objects:
geom_quasirandom
: Uses a van der Corput sequence or Tukey texturing (Tukey and Tukey "Strips displaying empirical distributions: I. textured dot strips") to space the dots to avoid overplotting. This uses sherrillmix/vipor.
geom_beeswarm
: Uses the beeswarm library to do point-size based offset.
Features:
dodge.width
is specified (thanks @josesho)See the examples below.
This package is on CRAN so install should be a simple:
install.packages('ggbeeswarm')
If you want the development version from GitHub, you can do:
devtools::install_github("eclarke/ggbeeswarm")
Here is a comparison between geom_jitter
and geom_quasirandom
on the iris
dataset:
set.seed(12345) library(ggplot2) library(ggbeeswarm) #compare to jitter ggplot(iris,aes(Species, Sepal.Length)) + geom_jitter() ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom()
Using geom_quasirandom
:
#default geom_quasirandom ggplot(mpg,aes(class, hwy)) + geom_quasirandom() # With categorical y-axis ggplot(mpg,aes(hwy, class)) + geom_quasirandom() # Some groups may have only a few points. Use `varwidth=TRUE` to adjust width dynamically. ggplot(mpg,aes(class, hwy)) + geom_quasirandom(varwidth = TRUE) # Automatic dodging sub_mpg <- mpg[mpg$class %in% c("midsize", "pickup", "suv"),] ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_quasirandom(dodge.width=1)
geom_quasirandom
can also use several other methods to distribute points. For example:
ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom(method='tukey') + ggtitle('Tukey texture') ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom(method='tukeyDense') + ggtitle('Tukey + density') ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom(method='frowney') + ggtitle('Banded frowns') ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom(method='smiley') + ggtitle('Banded smiles') ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom(method='pseudorandom') + ggtitle('Jittered density') ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm() + ggtitle('Beeswarm')
Using geom_beeswarm
:
ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm() ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm(side = 1L) ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=.5) # With categorical y-axis ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5) # Also watch out for points escaping from the plot with geom_beeswarm ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5) + scale_y_discrete(expand=expansion(add=c(0.5,1))) ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=1.1) # With automatic dodging ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_beeswarm(dodge.width=0.5)
df <- data.frame( x = "A", y = sample(1:100, 200, replace = TRUE) ) ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "swarm") + ggtitle('method = "swarm" (default)') ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "compactswarm") + ggtitle('method = "compactswarm"') ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "hex") + ggtitle('method = "hex"') ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "square") + ggtitle('method = "square"') ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "center") + ggtitle('method = "center"')
#With different beeswarm point distribution priority dat<-data.frame(x=rep(1:3,c(20,40,80))) dat$y<-rnorm(nrow(dat),dat$x) ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2) + ggtitle('Default (ascending)') + scale_x_continuous(expand=expansion(add=c(0.5,.5))) ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='descending') + ggtitle('Descending') + scale_x_continuous(expand=expansion(add=c(0.5,.5))) ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='density') + ggtitle('Density') + scale_x_continuous(expand=expansion(add=c(0.5,.5))) ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='random') + ggtitle('Random') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))
set.seed(1995) df2 <- data.frame( y = rnorm(1000), id = sample(c("G1", "G2", "G3"), size = 1000, replace = TRUE) ) p <- ggplot(df2, aes(x = id, y = y, colour = id)) # use corral.width to control corral width p + geom_beeswarm(cex = 2.5, corral = "none", corral.width = 0.9) + ggtitle('corral = "none" (default)') p + geom_beeswarm(cex = 2.5, corral = "gutter", corral.width = 0.9) + ggtitle('corral = "gutter"') p + geom_beeswarm(cex = 2.5, corral = "wrap", corral.width = 0.9) + ggtitle('corral = "wrap"') p + geom_beeswarm(cex = 2.5, corral = "random", corral.width = 0.9) + ggtitle('corral = "random"') p + geom_beeswarm(cex = 2.5, corral = "omit", corral.width = 0.9) + ggtitle('corral = "omit"')
Authors: Erik Clarke, Scott Sherrill-Mix, and Charlotte Dawson
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.