#' Number of observations in a plot panel
#'
#' \code{stat_panel_counts()} counts the number of observations in each panel.
#' \code{stat_group_counts()} counts the number of observations in each group.
#' By default they add one or more text labels to the top right corner of each
#' panel. Grouping is ignored by \code{stat_panel_counts()}. If no grouping
#' exists, the two statistics behave similarly.
#'
#' @param mapping The aesthetic mapping, usually constructed with
#' \code{\link[ggplot2]{aes}} or \code{\link[ggplot2]{aes_}}. Only needs to be
#' set at the layer level if you are overriding the plot defaults.
#' @param data A layer specific dataset. Rarely used, as you will not want to
#' override the plot defaults.
#' @param geom The geometric object to use display the data
#' @param position The position adjustment to use on this layer
#' @param show.legend logical. Should this layer be included in the legends?
#' \code{NA}, the default, includes it if any aesthetics are mapped.
#' \code{FALSE} never includes, and \code{TRUE} always includes.
#' @param inherit.aes If \code{FALSE}, overrides the default aesthetics, rather
#' than combining with them. This is most useful for helper functions that
#' define both data and aesthetics and should not inherit behaviour from the
#' default plot specification, e.g., \code{\link[ggplot2]{borders}}.
#' @param ... other arguments passed on to \code{\link[ggplot2]{layer}}. This
#' can include aesthetics whose values you want to set, not map. See
#' \code{\link[ggplot2]{layer}} for more details.
#' @param na.rm a logical indicating whether \code{NA} values should be stripped
#' before the computation proceeds.
#' @param label.x,label.y \code{numeric} Coordinates (in npc units) to be used
#' for absolute positioning of the labels.
#'
#' @details These statistics can be used to automatically count observations in
#' each panel of a plot, and by default add these counts as text labels. These
#' statistics, unlike \code{stat_quadrant_counts()} requires only one of
#' \emph{x} or \emph{y} aesthetics and can be used together with statistics
#' that have the same requirement, like \code{stat_density()}.
#'
#' The default position of the label is in the top right corner. When using
#' facets even with free limits for \emph{x} and \emph{y} axes, the location
#' of the labels is consistent across panels. This is achieved by use of
#' \code{geom = "text_npc"} or \code{geom = "label_npc"}. To pass the
#' positions in native data units to \code{label.x} and \code{label.y}, pass
#' also explicitly \code{geom = "text"}, \code{geom = "label"} or some other
#' geometry that use the \emph{x} and/or \emph{y} aesthetics. A vector with
#' the same length as the number of panels in the figure can be used if
#' needed.
#'
#' @section Computed variables: Data frame with one or more rows, one for each
#' group of observations for which counts are counted in \code{data}. \describe{
#' \item{x,npcx}{x value of label position in data- or npc units, respectively}
#' \item{y,npcy}{y value of label position in data- or npc units, respectively}
#' \item{count}{number of observations as an integer}
#' \item{count.label}{number of observations as character}}
#'
#' As shown in one example below \code{\link[gginnards]{geom_debug}} can be
#' used to print the computed values returned by any statistic. The output
#' shown includes also values mapped to aesthetics, like \code{label} in the
#' example. \code{x} and \code{y} are included in the output only if mapped.
#'
#' @note If a factor is mapped to \code{x} or to \code{y} aesthetics each level
#' of the factor constitutes a group, in this case the default positioning and
#' geom using NPC pseudo aesthetics will have to be overriden by passing
#' \code{geom = "text"} and data coordinates used. The default for factors
#' may change in the future.
#'
#' @return A plot layer instance. Using as output \code{data} the counts of
#' observations in each plot panel or per group in each plot panel.
#'
#' @family Functions for quadrant and volcano plots
#'
#' @export
#'
#' @examples
#'
#' # generate artificial data with numeric x and y
#' set.seed(67821)
#' x <- 1:100
#' y <- rnorm(length(x), mean = 10)
#' group <- factor(rep(c("A", "B"), times = 50))
#' my.data <- data.frame(x, y, group)
#'
#' # using automatically generated text labels
#'
#' ggplot(my.data, aes(x, y)) +
#' geom_point() +
#' stat_panel_counts()
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_panel_counts()
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts()
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(label.x = "left", hstep = 0.06, vstep = 0)
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(aes(label = after_stat(pc.label)))
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(aes(label = after_stat(pc.label)), digits = 3)
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(aes(label = after_stat(fr.label)))
#'
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(aes(label = after_stat(dec.label)))
#'
#' # one of x or y can be a factor
#' # label.x or label.y along the factor can be set to "factor" together
#' # with the use of geom_text()
#'
#' ggplot(mpg,
#' aes(factor(cyl), hwy)) +
#' stat_boxplot() +
#' stat_group_counts(geom = "text",
#' label.y = 10,
#' label.x = "factor") +
#' stat_panel_counts()
#'
#' # Numeric values can be used to build labels with alternative formats
#' # Here with sprintf(), but paste() and format() also work.
#'
#' ggplot(my.data, aes(x, y)) +
#' geom_point() +
#' stat_panel_counts(aes(label = sprintf("%i observations",
#' after_stat(count)))) +
#' scale_y_continuous(expand = expansion(mult = c(0.05, 0.12)))
#'
#' ggplot(mpg,
#' aes(factor(cyl), hwy)) +
#' stat_boxplot() +
#' stat_group_counts(geom = "text",
#' aes(label = sprintf("(%i)", after_stat(count))),
#' label.y = 10,
#' label.x = "factor")
#'
#' ggplot(mpg,
#' aes(factor(cyl), hwy)) +
#' stat_boxplot() +
#' stat_group_counts(aes(label = sprintf("n[%i]~`=`~%i",
#' after_stat(x), after_stat(count))),
#' parse = TRUE,
#' geom = "text",
#' label.y = 10,
#' label.x = "factor") +
#' stat_panel_counts(aes(label = sprintf("sum(n[i])~`=`~%i",
#' after_stat(count))),
#' parse = TRUE)
#'
#' # label position
#'
#' ggplot(my.data, aes(y)) +
#' stat_panel_counts(label.x = "left") +
#' stat_density(alpha = 0.5)
#'
#' ggplot(my.data, aes(y, colour = group)) +
#' stat_group_counts(label.y = "top") +
#' stat_density(aes(fill = group), alpha = 0.3)
#'
#' # The numeric value can be used as a label as is
#'
#' ggplot(mpg,
#' aes(factor(cyl), hwy)) +
#' stat_boxplot() +
#' stat_group_counts(geom = "text",
#' aes(label = after_stat(count)),
#' label.x = "factor",
#' label.y = 10) +
#' annotate(geom = "text", x = 0.55, y = 10, label = "n[i]~`=`", parse = TRUE)
#'
#' # We use geom_debug() to see the computed values
#'
#' gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)
#' if (gginnards.installed) {
#' library(gginnards)
#'
#' ggplot(my.data, aes(x, y)) +
#' geom_point() +
#' stat_panel_counts(geom = "debug")
#' }
#'
#' if (gginnards.installed) {
#' ggplot(my.data, aes(x, y, colour = group)) +
#' geom_point() +
#' stat_group_counts(geom = "debug")
#' }
#'
stat_panel_counts <- function(mapping = NULL,
data = NULL,
geom = "text_npc",
position = "identity",
label.x = "right",
label.y = "top",
na.rm = FALSE,
show.legend = FALSE,
inherit.aes = TRUE,
...) {
stopifnot(is.null(label.x) || is.numeric(label.x) || is.character(label.x))
stopifnot(is.null(label.y) || is.numeric(label.y) || is.character(label.y))
ggplot2::layer(
stat = StatPanelCounts,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
label.x = label.x,
label.y = label.y,
npc.used = grepl("_npc", geom),
...)
)
}
#' @rdname ggpp-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatPanelCounts <-
ggplot2::ggproto("StatPanelCounts", ggplot2::Stat,
compute_panel = function(data,
scales,
label.x,
label.y,
npc.used) {
force(data)
# total count (works because NAs have been already removed)
z <- tibble::tibble(count = nrow(data))
# label position
if (is.character(label.x)) {
if (npc.used) {
margin.npc <- 0.05
} else {
# margin set by scale
margin.npc <- 0
}
label.x <- compute_npcx(x = label.x)
if (!npc.used) {
if ("x" %in% colnames(data)) {
x.expanse <- abs(diff(range(data$x)))
x.min <- min(data$x)
label.x <- label.x * x.expanse + x.min
} else {
if (data$PANEL[1] == 1L) { # show only once
message("No 'x' mapping; 'label.x' requires a numeric argument in data units")
}
label.x <- NA_real_
}
}
}
if (is.character(label.y)) {
if (npc.used) {
margin.npc <- 0.05
} else {
# margin set by scale
margin.npc <- 0
}
label.y <- compute_npcy(y = label.y)
if (!npc.used) {
if ("y" %in% colnames(data)) {
y.expanse <- abs(diff(range(data$y)))
y.min <- min(data$y)
label.y <- label.y * y.expanse + y.min
} else {
if (data$PANEL[1] == 1L) { # show only once
message("No 'y' mapping; 'label.y' requires a numeric argument in data units")
}
label.y <- NA_real_
}
}
}
if (npc.used) {
z$npcx <- label.x
z$x <- NA_real_
z$npcy <- label.y
z$y <- NA_real_
} else {
z$x <- label.x
z$npcx <- NA_real_
z$y <- label.y
z$npcy <- NA_real_
}
z$count.label <- sprintf("n=%i", z$count)
z
},
default_aes =
ggplot2::aes(npcx = ggplot2::after_stat(npcx),
npcy = ggplot2::after_stat(npcy),
label = ggplot2::after_stat(count.label),
hjust = "inward",
vjust = "inward"),
required_aes = c("x|y")
)
#' @rdname stat_panel_counts
#'
#' @param hstep,vstep numeric in npc units, the horizontal and vertical step
#' used between labels for different groups.
#' @param digits integer Number of digits for fraction and percent labels.
#'
#' @export
#'
stat_group_counts <- function(mapping = NULL,
data = NULL,
geom = "text_npc",
position = "identity",
label.x = "right",
label.y = "top",
hstep = 0,
vstep = NULL,
digits = 2,
na.rm = FALSE,
show.legend = FALSE,
inherit.aes = TRUE,
...) {
stopifnot(is.null(label.x) || is.numeric(label.x) || is.character(label.x))
stopifnot(is.null(label.y) || is.numeric(label.y) || is.character(label.y))
ggplot2::layer(
stat = StatGroupCounts,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm,
label.x = label.x,
label.y = label.y,
digits = digits,
hstep = hstep,
vstep = ifelse(is.null(vstep),
ifelse(grepl("label", geom),
0.10,
0.05),
vstep),
npc.used = grepl("_npc", geom),
...)
)
}
#' @rdname ggpp-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatGroupCounts <-
ggplot2::ggproto("StatGroupCounts", ggplot2::Stat,
compute_panel = function(data,
params,
scales,
label.x,
label.y,
digits = 2,
vstep,
hstep,
npc.used) {
force(data)
# Build group labels
if (exists("grp.label", data)) {
if (length(unique(data[["grp.label"]])) > 1L) {
warning("Non-unique value in 'data$grp.label' using group index ", data[["group"]][1], " as label.")
grp.label <- as.character(unique(data[["group"]]))
} else {
grp.label <- unique(data[["grp.label"]])
}
} else {
# if nothing mapped to grp.label we use group index as label
grp.label <- as.character(unique(data[["group"]]))
}
# validate label positions
group.idx <- abs(unique(data$group))
if (length(label.x) >= length(group.idx)) {
label.x <- label.x[1:length(group.idx)]
} else if (length(label.x) > 0) {
label.x <- label.x[1]
}
if (length(label.y) >= length(group.idx)) {
label.y <- label.y[1:length(group.idx)]
} else if (length(label.y) > 0) {
label.y <- label.y[1]
}
# Compute number of observations
# (works because NAs have been already removed)
z <- data.frame()
for (g in unique(data$group)) {
temp <- cbind(data[data$group == g, ][ 1, ], count = sum(data$group == g))
z <- rbind(z, temp)
}
z$total <- nrow(data)
z$count.label <- sprintf("n=%i", z$count)
z$pc.label <- sprintf("p=%.*f%%",
digits - 2,
z$count / z$total * 100)
z$dec.label <- sprintf("f=%.*f",
digits,
z$count / z$total)
z$fr.label <- sprintf("%i / %i",
z$count, z$total)
# Compute label positions
if (is.character(label.x)) {
if (label.x[1] == "factor") {
label.x <- z$group
} else {
if (npc.used) {
margin.npc <- 0.05
} else {
# margin set by scale
margin.npc <- 0
}
label.x <- compute_npcx(x = label.x, group = group.idx, h.step = hstep,
margin.npc = margin.npc)
if (!npc.used) {
if ("x" %in% colnames(data)) {
x.expanse <- abs(diff(range(data$x)))
x.min <- min(data$x)
label.x <- label.x * x.expanse + x.min
} else {
if (data$PANEL[1] == 1L && group.idx == 1L) { # show only once
message("No 'x' mapping; 'label.x' requires a numeric argument in data units")
}
label.x <- NA_real_
}
}
}
}
if (is.character(label.y)) {
if (label.y[1] == "factor") {
label.x <- z$group
} else {
if (npc.used) {
margin.npc <- 0.05
} else {
# margin set by scale
margin.npc <- 0
}
label.y <- compute_npcy(y = label.y, group = group.idx, v.step = vstep,
margin.npc = margin.npc)
if (!npc.used) {
if ("y" %in% colnames(data)) {
y.expanse <- abs(diff(range(data$y)))
y.min <- min(data$y)
label.y <- label.y * y.expanse + y.min
} else {
if (data$PANEL[1] == 1L && group.idx == 1L) { # show only once
message("No 'y' mapping; 'label.y' requires a numeric argument in data units")
}
label.y <- NA_real_
}
}
}
}
if (npc.used) {
z$npcx <- label.x
z$x <- NA_real_
z$npcy <- label.y
z$y <- NA_real_
} else {
z$x <- label.x
z$npcx <- NA_real_
z$y <- label.y
z$npcy <- NA_real_
}
z$count.label <- sprintf("n=%i", z$count)
z
},
default_aes =
ggplot2::aes(npcx = ggplot2::after_stat(npcx),
npcy = ggplot2::after_stat(npcy),
label = ggplot2::after_stat(count.label),
hjust = "inward",
vjust = "inward"),
required_aes = c("x|y")
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.