#' Count Neighbors within a Radius
#'
#' This function counts the number of neighboring points within a specified
#' radius for each point in a given set of coordinates using a C implementation.
#'
#' @param x A numeric vector of x-coordinates of the points.
#' @param y A numeric vector of y-coordinates of the points.
#' @param r2 A numeric value representing the squared radius within which to
#' search for neighboring points.
#' @param xy A numeric value representing the aspect ratio (usually the ratio of
#' the y-scale to the x-scale).
#' @return A numeric vector where each element represents the count of
#' neighboring points within the specified radius for each point.
count_neighbors <- function(x, y, r2, xy) {
.Call("count_neighbors_", x, y, r2, xy, "ggpointdensity")
}
# Equivalent R code for reference:
# count_neighbors_r <- function(x, y, r2, xy) {
# yx <- 1 / xy
# sapply(1:length(x), function(i) {
# sum((yx * (x[i] - x)^2) + (xy * (y[i] - y)^2) < r2)
# })
# }
#' @inherit geom_pointdensity
#' @param geom The geometric object to use to display the data for this layer,
#' defaults to "point".
#'
#' @author Lukas PM Kremer & Simon Anders
#' @seealso You can find examples and demo plots at
#' <https://github.com/LKremer/ggpointdensity>
#'
#' @export
stat_pointdensity <- function(
mapping = NULL,
data = NULL,
geom = "point",
position = "identity",
...,
adjust = 1,
na.rm = FALSE,
method = c("auto", "kde2d", "neighbors"),
method.args = list(),
show.legend = NA,
inherit.aes = TRUE
) {
method <- match.arg(method)
layer(
data = data,
mapping = mapping,
stat = StatPointdensity,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
adjust = adjust,
na.rm = na.rm,
method = method,
method.args = method.args,
...
)
)
}
StatPointdensity <- ggproto(
"StatPointdensity",
Stat,
default_aes = aes(color = stat(density)),
required_aes = c("x", "y"),
compute_layer = function(self, data, params, layout) {
# This function mostly copied from ggplot2's Stat
ggplot2:::check_required_aesthetics(
self$required_aes,
c(names(data), names(params)),
ggplot2:::snake_class(self)
)
# Make sure required_aes consists of the used set of aesthetics in case of
# "|" notation in self$required_aes
required_aes <- intersect(
names(data),
unlist(strsplit(self$required_aes, "|", fixed = TRUE))
)
data <- ggplot2:::remove_missing(
data,
params$na.rm,
c(required_aes, self$non_missing_aes),
ggplot2:::snake_class(self),
finite = FALSE # Note that in ggplot2's Stat this is TRUE
)
# Trim off extra parameters
params <- params[intersect(names(params), self$parameters())]
args <- c(list(data = quote(data), scales = quote(scales)), params)
ggplot2:::dapply(data, "PANEL", function(data) {
scales <- layout$get_scales(data$PANEL[1])
tryCatch(do.call(self$compute_panel, args), error = function(e) {
warning(glue::glue(
"Computation failed in `{ggplot2:::snake_class(self)}()`:\n{e$message}"
))
vctrs:::new_data_frame()
})
})
},
setup_params = function(data, params) {
if (identical(params$method, "auto")) {
# Use default nn correction for small datasets, kde2d for
# larger. Based on size of the _largest_ group.
max_group <- max(table(interaction(data$group, data$PANEL, drop = TRUE)))
if (max_group > 20000) {
message(paste0(
"geom_pointdensity() using method='kde2d' due to large number of ",
"points (>20k). You may override this by setting method='neighbors'."
))
params$method <- "kde2d"
} else {
params$method <- "neighbors"
}
}
params
},
compute_group = function(
data,
scales,
adjust = 1,
method = "auto",
method.args = list()
) {
finites <- is.finite(data$x) & is.finite(data$y)
if (nrow(data[finites, ]) == 1) {
data$density <- NA
} else if (identical(method, "neighbors")) {
# find an appropriate bandwidth (radius), pretty ad-hoc:
xrange <- diff(scales$x$get_limits()) * adjust
yrange <- diff(scales$y$get_limits()) * adjust
r2 <- (xrange + yrange) / 70
# since x and y may be on different scales, we need a
# factor to weight x and y distances accordingly:
xy <- xrange / yrange
# counting the number of neighbors around each point,
# this will be used to color the points
data$density <- count_neighbors(
data$x,
data$y,
r2 = r2,
xy = xy
)
} else if (identical(method, "kde2d")) {
ddata <- data[finites, ]
base.args <- list(
x = ddata$x,
y = ddata$y,
lims = c(scales$x$dimension(), scales$y$dimension())
)
if (!is.element("n", names(method.args))) {
method.args["n"] <- 100
}
if (!is.element("h", names(method.args))) {
h <- c(MASS::bandwidth.nrd(ddata$x), MASS::bandwidth.nrd(ddata$y))
h <- pmax(sqrt(.Machine$double.eps), h)
method.args$h <- h * adjust
}
dens <- do.call(MASS::kde2d, c(base.args, method.args))
# credits to Kamil Slowikowski:
ix <- findInterval(data$x, dens$x)
iy <- findInterval(data$y, dens$y)
ii <- cbind(ix, iy)
data$density[finites] <- dens$z[ii]
data$density <- pmax(data$density, .Machine$double.eps, na.rm = TRUE)
} else {
if (is.character(method)) {
method <- match.fun(method)
}
data$density <- do.call(method, c(method.args))
}
data$ndensity <- data$density / max(data$density)
data
}
)
#' A cross between a scatter plot and a 2D density plot
#'
#' @description `geom_pointdensity()` visualizes overlapping data points on a 2D
#' coordinate system. It combines the benefits of
#' [`geom_point()`][ggplot2::geom_point()],
#' [`geom_density2d()`][ggplot2::geom_density2d()], and
#' [`geom_bin2d()`][ggplot2::geom_bin2d()] by coloring individual points based
#' on the density of neighboring points. This approach highlights the overall
#' data distribution while preserving the visibility of individual outliers,
#' making it ideal for data exploration.
#'
#' @inheritParams ggplot2::geom_point
#' @param method Density estimation method. Options are `"auto"`, `"neighbors"`,
#' or `"kde2d"`.
#'
#' - `"auto"` (default): Selects the appropriate method based on the number of
#' points. `"neighbors"` is faster for small datasets, while `"kde2d"` is more
#' efficient for large datasets.
#' - `"neighbors"`: Determines an appropriate radius and counts the number of
#' points within this radius for each point.
#' - `"kde2d"`: Uses 2D kernel density estimation via [MASS::kde2d()].
#' Additional arguments can be provided through `method.args`.
#' @param method.args List of additional arguments passed on to the density
#' estimation function defined by `method` (e.g. [MASS::kde2d()]).
#' @param adjust Multiplicative bandwidth adjustment for density estimation. A
#' value less than 1 (e.g., `adjust = 0.1`) yields a smoother density
#' estimate, while a value greater than 1 (e.g., `adjust = 5`) increases the
#' level of visible detail.
#'
#' @inheritSection ggplot2::geom_point Aesthetics
#'
#' @author Lukas PM Kremer & Simon Anders
#' @seealso You can find examples and demo plots at
#' <https://github.com/LKremer/ggpointdensity>
#'
#' @export
#'
#' @examples
#' library(ggpointdensity)
#' library(ggplot2)
#' library(dplyr)
#'
#' # generate some toy data
#' dat <- bind_rows(
#' tibble(x = rnorm(7000, sd = 1),
#' y = rnorm(7000, sd = 10),
#' group = "foo"),
#' tibble(x = rnorm(3000, mean = 1, sd = .5),
#' y = rnorm(3000, mean = 7, sd = 5),
#' group = "bar"))
#'
#' # plot it with geom_pointdensity()
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity()
#'
#' # adjust the smoothing bandwidth,
#' # i.e. the radius around the points
#' # in which neighbors are counted
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity(adjust = .1)
#'
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity(adjust = 4)
#'
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity(adjust = 4) +
#' scale_colour_continuous(low = "red", high = "black")
#'
#' # I recommend the viridis package
#' # for a more useful color scale
#' library(viridis)
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity() +
#' scale_color_viridis()
#'
#' # Of course you can combine the geom with standard
#' # ggplot2 features such as facets...
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity() +
#' scale_color_viridis() +
#' facet_wrap(~ group)
#'
#' # ... or point shape and size:
#' dat_subset <- sample_frac(dat, .1) #' smaller data set
#' ggplot(data = dat_subset, mapping = aes(x = x, y = y)) +
#' geom_pointdensity(size = 3, shape = 17) +
#' scale_color_viridis()
#'
#' # Zooming into the axis works as well, keep in mind
#' # that xlim() and ylim() affect the density since they
#' # remove data points.
#' # It may be better to use coord_cartesian() instead.
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity() +
#' scale_color_viridis() +
#' xlim(c(-1, 3)) + ylim(c(-5, 15))
#'
#' ggplot(data = dat, mapping = aes(x = x, y = y)) +
#' geom_pointdensity() +
#' scale_color_viridis() +
#' coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15))
geom_pointdensity <- function(
mapping = NULL,
data = NULL,
stat = "pointdensity",
position = "identity",
...,
method = c("auto", "kde2d", "neighbors"),
method.args = list(),
adjust = 1,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE
) {
method <- match.arg(method)
ggplot2::layer(
data = data,
mapping = mapping,
stat = stat,
geom = ggplot2::GeomPoint,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
method = method,
method.args = method.args,
adjust = adjust,
na.rm = na.rm,
...
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.