Nothing
# Distribution + interval stat for analytical and sample-based distributions
#
# Author: mjskay
###############################################################################
# compute_limits ----------------------------------------------------------
#' StatSlabinterval$compute_limits()
#' @noRd
compute_limits_slabinterval = function(
self, data, trans, orientation,
p_limits,
trim, adjust,
...
) {
dist = check_one_dist(data$dist)
if (distr_is_missing(dist)) {
return(data.frame(.lower = NA, .upper = NA))
}
if (distr_is_factor_like(dist)) {
# limits on factor-like dists are determined by the scale, which will
# have been set earlier (in layer_slabinterval()), so we don't have to
# do it here
return(data.frame(.lower = NA, .upper = NA))
}
if (distr_is_constant(dist)) {
.median = distr_quantile(dist)(0.5)
return(data.frame(.lower = .median, .upper = .median))
}
if (distr_is_sample(dist)) {
.sample = distr_get_sample(dist)
.weights = distr_get_sample_weights(dist)
return(compute_limits_sample(.sample, trans, trim, adjust, ..., weights = .weights))
}
quantile_fun = distr_quantile(dist)
# if the lower or upper p limit is NA, check to see if the dist has a
# finite limit on the transformed drawing scale, otherwise use .001 or
# .999 as p limit. This ensures that distributions with finite limits
# can be displayed right up to their limits by default.
if (is.na(p_limits[[1]])) {
lower_limit = trans$transform(quantile_fun(0))
p_limits[[1]] = if (all(is.finite(lower_limit))) 0 else .001
}
if (is.na(p_limits[[2]])) {
upper_limit = trans$transform(quantile_fun(1))
p_limits[[2]] = if (all(is.finite(upper_limit))) 1 else .999
}
# need to use min / max here in case of multivariate distributions
# (e.g. distributional::dist_multivariate_normal())
lower_limit = min(quantile_fun(p_limits[[1]]))
upper_limit = max(quantile_fun(p_limits[[2]]))
data.frame(
.lower = lower_limit,
.upper = upper_limit
)
}
#' compute limits of the provided sample
#' @param x sample data on **original** scale
#' @param trans scale transformation
#' @param trim/adjust see stat_slabinterval
#' @noRd
compute_limits_sample = function(x, trans, trim, adjust, ..., density = "bounded", weights = NULL) {
density = match_function(density, "density_")
# determine limits of data based on the density estimator
x = trans$transform(x)
x_range = range(density(x, n = 2, range_only = TRUE, trim = trim, adjust = adjust, weights = weights)$x)
data.frame(
.lower = trans$inverse(x_range[[1]]),
.upper = trans$inverse(x_range[[2]])
)
}
# compute_slab ------------------------------------------------------------
#' StatSlabinterval$compute_slab()
#' @noRd
compute_slab_slabinterval = function(
self, data, scales, trans, input, orientation,
slab_type, limits, n,
adjust, trim, expand, breaks, align, outline_bars,
...
) {
dist = data$dist
# TODO: add support for multivariate distributions
if (distr_is_missing(dist) || distr_is_multivariate(dist)) {
return(data.frame(.input = NA_real_, f = NA_real_, n = NA_integer_))
}
# calculate pdf and cdf
cdf_fun = distr_cdf(dist)
if (distr_is_constant(dist)) {
# for constant distributions, to reliably get the infinite point density
# and a constant line in the CDF, need to manually pick input values
quantile_fun = distr_quantile(dist)
input_2 = quantile_fun(0.5)
input_1 = min(input, input_2)
input_3 = max(input, input_2)
input = c(input_1, input_2, input_2, input_2, input_3)
pdf = c(0, 0, Inf, 0, 0)
cdf = c(0, 0, 1, 1, 1)
if (!expand) {
input = input[-c(1, 5)]
pdf = pdf[-c(1, 5)]
cdf = cdf[-c(1, 5)]
}
} else if (!distr_is_factor_like(dist) && distr_is_sample(dist)) {
return(compute_slab_sample(
trans$transform(distr_get_sample(dist)), scales, trans, input,
slab_type = slab_type, limits = limits, n = n,
adjust = adjust, trim = trim, expand = expand,
breaks = breaks, align = align, outline_bars = outline_bars,
...,
weights = distr_get_sample_weights(dist)
))
} else if (trans$name == "identity") {
pdf_fun = distr_pdf(dist)
if (distr_is_discrete(dist)) {
# for discrete distributions, we have to adjust the positions of the x
# values to create bin-like things
input_ = unique(round(input)) # center of bin
input_1 = input_ - 0.5 # first edge of bin
input_2 = input_ + 0.5 # second edge of bin
pdf = pdf_fun(input_)
cdf = cdf_fun(input_)
# we also need the lag of the cdf so we can make it a step function
# at the midpoint of each bin
lag_cdf_input = c(input_[[1]] - 1, input_[-length(input_)])
lag_cdf = cdf_fun(lag_cdf_input)
# need an epsilon value we can use to create ghost values "just above" and
# "just below" the bin edges, so that logical conditions on fills like `x > 1`
# work as expected if 1 is a bin edge.
eps = 2*.Machine$double.eps
if (outline_bars) {
# have to return to 0 in between each bar so that bar outlines are drawn
input = as.vector(rbind(input_1, input_1, input_1 + eps, input_, input_, input_2 - eps, input_2, input_2))
pdf = as.vector(rbind(0, pdf, pdf, pdf, pdf, pdf, pdf, 0))
cdf = as.vector(rbind(lag_cdf, lag_cdf, lag_cdf, lag_cdf, cdf, cdf, cdf, cdf))
} else {
# as.vector(rbind(x, y, z, ...)) interleaves vectors x, y, z, ..., giving
# us the bin endpoints and midpoints --- then just need to repeat the same
# value of density for both endpoints of the same bin and to make sure the
# cdf is a step function that steps at the midpoint of the bin
input = as.vector(rbind(input_1, input_1 + eps, input_, input_, input_2 - eps, input_2))
pdf = rep(pdf, each = 6)
cdf = as.vector(rbind(lag_cdf, lag_cdf, lag_cdf, cdf, cdf, cdf))
}
} else {
pdf = pdf_fun(input)
cdf = cdf_fun(input)
}
} else {
# must transform the density according to the scale transformation
pdf_fun = function(x, ...) transform_pdf(distr_pdf(dist), trans$transform(x), trans, g_inverse_at_y = x, ...)
pdf = pdf_fun(input)
cdf = cdf_fun(input)
}
data.frame(
.input = input,
f = get_slab_function(slab_type, list(pdf = pdf, cdf = cdf)),
pdf = pdf,
cdf = cdf,
n = if (distr_is_sample(dist)) length(distr_get_sample(dist)) else Inf
)
}
#' compute slab functions for the provided sample
#' @param x sample data on **transformed** scale
#' @param trans scale transformation
#' @param (others) see stat_slabinterval
#' @importFrom stats density
#' @importFrom graphics hist
#' @noRd
compute_slab_sample = function(
x, scales, trans, input,
slab_type, limits, n,
adjust, trim, expand, breaks, align, outline_bars,
density,
...,
weights = NULL
) {
if (is.integer(x) || inherits(x, "mapped_discrete")) {
# discrete variables are always displayed as histograms
slab_type = "histogram"
breaks = seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE) + 1) - 0.5
}
if (slab_type == "histogram") density = "histogram"
density = match_function(density, prefix = "density_")
# calculate pdf and cdf
d = density(
x, n = n, adjust = adjust, trim = trim,
breaks = breaks, align = align, outline_bars = outline_bars,
weights = weights
)
slab_df = data.frame(
.input = trans$inverse(d$x),
pdf = d$y,
cdf = d$cdf %||% weighted_ecdf(x, weights = weights)(d$x)
)
# extend x values to the range of the plot. To do that we have to include
# x values requested from the original `input` if they are outside the
# range of the slab
expand = rep_len(expand, 2L)
if (expand[[1]]) {
input_below_slab = input[input < min(slab_df$.input) - .Machine$double.eps]
if (length(input_below_slab) > 0) {
slab_df = rbind(data.frame(
.input = input_below_slab,
pdf = 0,
cdf = 0
), slab_df)
}
}
if (expand[[2]]) {
input_above_slab = input[input > max(slab_df$.input) + .Machine$double.eps]
if (length(input_above_slab) > 0) {
slab_df = rbind(slab_df, data.frame(
.input = input_above_slab,
pdf = 0,
cdf = 1
))
}
}
slab_df[["f"]] = get_slab_function(slab_type, slab_df)
slab_df$n = length(x)
slab_df
}
# compute_interval --------------------------------------------------------
#' StatSlabinterval$compute_interval()
#' @noRd
compute_interval_slabinterval = function(
self, data, trans,
orientation, point_interval,
.width, na.rm,
...
) {
if (is.null(point_interval)) return(data.frame())
dist = data$dist
if (distr_is_missing(dist)) {
return(data.frame(.value = NA_real_, .lower = NA_real_, .upper = NA_real_, .width = .width))
}
distr_point_interval(dist, point_interval, trans = trans, .width = .width, na.rm = na.rm)
}
# stat_slabinterval --------------------------------------------------
#' Slab + interval plots for sample data and analytical distributions (ggplot stat)
#'
#' "Meta" stat for computing distribution functions (densities or CDFs) + intervals for use with
#' [geom_slabinterval()]. Useful for creating eye plots, half-eye plots, CCDF bar plots,
#' gradient plots, histograms, and more. Sample data can be supplied to the `x` and `y`
#' aesthetics or analytical distributions (in a variety of formats) can be supplied to the
#' `xdist` and `ydist` aesthetics.
#' See **Details**.
#'
#' A highly configurable stat for generating a variety of plots that combine a "slab"
#' that describes a distribution plus a point summary and any number of intervals.
#' Several "shortcut" stats are provided
#' which combine multiple options to create useful geoms, particularly *eye plots*
#' (a violin plot of density plus interval), *half-eye plots* (a density plot plus interval),
#' *CCDF bar plots* (a complementary CDF plus interval), and *gradient plots*
#' (a density encoded in color alpha plus interval).
#'
#' The shortcut stats include:
#'
#' - [stat_eye()]: Eye plots (violin + interval)
#' - [stat_halfeye()]: Half-eye plots (density + interval)
#' - [stat_ccdfinterval()]: CCDF bar plots (CCDF + interval)
#' - [stat_cdfinterval()]: CDF bar plots (CDF + interval)
#' - [stat_gradientinterval()]: Density gradient + interval plots
#' - [stat_slab()]: Density plots
#' - [stat_histinterval()]: Histogram + interval plots
#' - [stat_pointinterval()]: Point + interval plots
#' - [stat_interval()]: Interval plots
#'
#' @template details-x-y-xdist-ydist
#' @eval rd_slabinterval_computed_variables(stat = StatSlabinterval)
#' @eval rd_slabinterval_aesthetics(stat = StatSlabinterval)
#' @eval rd_layer_params("slabinterval", stat = StatSlabinterval, as_dots = TRUE)
#'
#' @inheritParams geom_slabinterval
#' @inheritParams density_histogram
#' @param geom Use to override the default connection between
#' [stat_slabinterval()] and [geom_slabinterval()]
#' @param slab_type (deprecated) The type of slab function to calculate: probability density (or mass) function
#' (`"pdf"`), cumulative distribution function (`"cdf"`), or complementary CDF (`"ccdf"`). Instead of using
#' `slab_type` to change `f` and then mapping `f` onto an aesthetic, it is now recommended to simply map the
#' corresponding computed variable (e.g. `pdf`, `cdf`, or `1 - cdf`) directly onto the desired aesthetic.
#' @param p_limits Probability limits (as a vector of size 2) used to determine the lower and upper
#' limits of *theoretical* distributions (distributions from *samples* ignore this parameter and determine
#' their limits based on the limits of the sample). E.g., if this is `c(.001, .999)`, then a slab is drawn
#' for the distribution from the quantile at `p = .001` to the quantile at `p = .999`. If the lower
#' (respectively upper) limit is `NA`, then the lower (upper) limit will be the minimum (maximum) of the
#' distribution's support if it is finite, and `0.001` (`0.999`) if it is not finite. E.g., if
#' `p_limits` is `c(NA, NA)`, on a gamma distribution the effective value of `p_limits` would be
#' `c(0, .999)` since the gamma distribution is defined on `(0, Inf)`; whereas on a normal distribution
#' it would be equivalent to `c(.001, .999)` since the normal distribution is defined on `(-Inf, Inf)`.
#' @param outline_bars For sample data (if `density` is `"histogram"`) and for discrete analytical
#' distributions (whose slabs are drawn as histograms), determines
#' if outlines in between the bars are drawn when the `slab_color` aesthetic is used. If `FALSE`
#' (the default), the outline is drawn only along the tops of the bars; if `TRUE`, outlines in between
#' bars are also drawn. See [density_histogram()].
#' @param density Density estimator for sample data. One of:
#' - A function which takes a numeric vector and returns a list with elements
#' `x` (giving grid points for the density estimator) and `y` (the
#' corresponding densities). \pkg{ggdist} provides a family of functions
#' following this format, including [density_unbounded()] and
#' [density_bounded()]. This format is also compatible with [stats::density()].
#' - A string giving the suffix of a function name that starts with `"density_"`;
#' e.g. `"bounded"` for `[density_bounded()]`, `"unbounded"` for `[density_unbounded()]`,
#' or `"histogram"` for [density_histogram()].
#' Defaults to `"bounded"`, i.e. [density_bounded()], which estimates the bounds from
#' the data and then uses a bounded density estimator based on the reflection method.
#' @param adjust Passed to `density`: the bandwidth for the density estimator for sample data
#' is adjusted by multiplying it by this value. See e.g. [density_bounded()] for more information.
#' Default (`waiver()`) defers to the default of the density estimator, which is usually `1`.
#' @param trim For sample data, should the density estimate be trimmed to the range of the
#' data? Passed on to the density estimator; see the `density` parameter. Default `TRUE`.
#' @param expand For sample data, should the slab be expanded to the limits of the scale? Default `FALSE`.
#' Can be length two to control expansion to the lower and upper limit respectively.
#' @param limits Manually-specified limits for the slab, as a vector of length two. These limits are combined with those
#' computed based on `p_limits` as well as the limits defined by the scales of the plot to determine the
#' limits used to draw the slab functions: these limits specify the maximal limits; i.e., if specified, the limits
#' will not be wider than these (but may be narrower). Use `NA` to leave a limit alone; e.g.
#' `limits = c(0, NA)` will ensure that the lower limit does not go below 0, but let the upper limit
#' be determined by either `p_limits` or the scale settings.
#' @param n Number of points at which to evaluate the function that defines the slab.
#' @param point_interval A function from the [point_interval()] family (e.g., `median_qi`,
#' `mean_qi`, `mode_hdi`, etc), or a string giving the name of a function from that family
#' (e.g., `"median_qi"`, `"mean_qi"`, `"mode_hdi"`, etc; if a string, the caller's environment is searched
#' for the function, followed by the \pkg{ggdist} environment). This function determines the point summary
#' (typically mean, median, or mode) and interval type (quantile interval, `qi`;
#' highest-density interval, `hdi`; or highest-density continuous interval, `hdci`). Output will
#' be converted to the appropriate `x`- or `y`-based aesthetics depending on the value of `orientation`.
#' See the [point_interval()] family of functions for more information.
#' @param .width The `.width` argument passed to `point_interval`: a vector of probabilities to use
#' that determine the widths of the resulting intervals. If multiple probabilities are provided,
#' multiple intervals per group are generated, each with a different probability interval (and
#' value of the corresponding `.width` and `level` generated variables).
#' @param show.legend Should this layer be included in the legends? Default is `c(size = FALSE)`, unlike most geoms,
#' to match its common use cases. `FALSE` hides all legends, `TRUE` shows all legends, and `NA` shows only
#' those that are mapped (the default for most geoms).
#' @return A [ggplot2::Stat] representing a slab or combined slab+interval geometry which can
#' be added to a [ggplot()] object.
#' @seealso See [geom_slabinterval()] for more information on the geom these stats
#' use by default and some of the options it has.
#' See `vignette("slabinterval")` for a variety of examples of use.
#' @examples
#'
#' library(dplyr)
#' library(ggplot2)
#' library(distributional)
#'
#' theme_set(theme_ggdist())
#'
#'
#' # EXAMPLES ON SAMPLE DATA
#' set.seed(1234)
#' df = data.frame(
#' group = c("a", "b", "c", "c", "c"),
#' value = rnorm(2500, mean = c(5, 7, 9, 9, 9), sd = c(1, 1.5, 1, 1, 1))
#' )
#'
#' # here are vertical eyes:
#' df %>%
#' ggplot(aes(x = group, y = value)) +
#' stat_eye()
#'
#' # note the sample size is not automatically incorporated into the
#' # area of the densities in case one wishes to plot densities against
#' # a reference (e.g. a prior distribution).
#' # But you may wish to account for sample size if using these geoms
#' # for something other than visualizing posteriors; in which case
#' # you can use after_stat(f*n):
#' df %>%
#' ggplot(aes(x = group, y = value)) +
#' stat_eye(aes(thickness = after_stat(pdf*n)))
#'
#'
#' # EXAMPLES ON ANALYTICAL DISTRIBUTIONS
#'
#' dist_df = tribble(
#' ~group, ~subgroup, ~mean, ~sd,
#' "a", "h", 5, 1,
#' "b", "h", 7, 1.5,
#' "c", "h", 8, 1,
#' "c", "i", 9, 1,
#' "c", "j", 7, 1
#' )
#'
#' # Using functions from the distributional package (like dist_normal()) with the
#' # dist aesthetic can lead to more compact/expressive specifications
#'
#' dist_df %>%
#' ggplot(aes(x = group, ydist = dist_normal(mean, sd), fill = subgroup)) +
#' stat_eye(position = "dodge")
#'
#' # using the old character vector + args approach
#' dist_df %>%
#' ggplot(aes(x = group, dist = "norm", arg1 = mean, arg2 = sd, fill = subgroup)) +
#' stat_eye(position = "dodge")
#'
#' # the stat_slabinterval family applies a Jacobian adjustment to densities
#' # when plotting on transformed scales in order to plot them correctly.
#' # It determines the Jacobian using symbolic differentiation if possible,
#' # using stats::D(). If symbolic differentation fails, it falls back
#' # to numericDeriv(), which is less reliable; therefore, it is
#' # advisable to use scale transformation functions that are defined in
#' # terms of basic math functions so that their derivatives can be
#' # determined analytically (most of the transformation functions in the
#' # scales package currently have this property).
#' # For example, here is a log-Normal distribution plotted on the log
#' # scale, where it will appear Normal:
#' data.frame(dist = "lnorm", logmean = log(10), logsd = 2*log(10)) %>%
#' ggplot(aes(y = 1, dist = dist, arg1 = logmean, arg2 = logsd)) +
#' stat_halfeye() +
#' scale_x_log10(breaks = 10^seq(-5,7, by = 2))
#'
#' # see vignette("slabinterval") for many more examples.
#'
#' @name stat_slabinterval
#' @importFrom distributional dist_wrap dist_missing dist_sample is_distribution
NULL
#' @rdname ggdist-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatSlabinterval = ggproto("StatSlabinterval", AbstractStatSlabinterval,
aes_docs = defaults(list(
x = 'x position of the geometry (when orientation = `"vertical"`); or sample data to be summarized
(when `orientation = "horizontal"` with sample data).',
y = 'y position of the geometry (when orientation = `"horizontal"`); or sample data to be summarized
(when `orientation = "vertical"` with sample data).',
weight = 'When using samples (i.e. the `x` and `y` aesthetics, not `xdist` or `ydist`), optional
weights to be applied to each draw.',
xdist = 'When using analytical distributions, distribution to map on the x axis: a \\pkg{distributional}
object (e.g. [dist_normal()]) or a [posterior::rvar()] object.',
ydist = 'When using analytical distributions, distribution to map on the y axis: a \\pkg{distributional}
object (e.g. [dist_normal()]) or a [posterior::rvar()] object.',
dist = 'When using analytical distributions, a name of a distribution (e.g. `"norm"`), a
\\pkg{distributional} object (e.g. [dist_normal()]), or a [posterior::rvar()] object. See **Details**.',
args = 'Distribution arguments (`args` or `arg1`, ... `arg9`). See **Details**.'
), AbstractStatSlabinterval$aes_docs),
default_aes = defaults(aes(
xdist = NULL,
ydist = NULL,
dist = NULL,
args = NULL,
arg1 = NULL,
arg2 = NULL,
arg3 = NULL,
arg4 = NULL,
arg5 = NULL,
arg6 = NULL,
arg7 = NULL,
arg8 = NULL,
arg9 = NULL,
weight = NULL
), AbstractStatSlabinterval$default_aes),
# interval parameter used to determine if the stat re-groups
# data by the combination of input group + dist + args. For
# most use cases this should be TRUE, but for some corner
# cases (like lineribbons), this needs to be FALSE or weird
# things will happen.
group_by_dist = TRUE,
default_params = defaults(list(
p_limits = c(NA, NA),
density = "bounded",
adjust = waiver(),
trim = TRUE,
expand = FALSE,
breaks = waiver(),
align = "none",
outline_bars = FALSE,
point_interval = "median_qi",
# deprecated parameters
slab_type = NULL # deprecated, set by default_slab_type (below)
), AbstractStatSlabinterval$default_params),
layer_function = "layer_slabinterval",
# overrides slab_type in setup_params() when slab_type is NULL (the default).
# This allows us to detect if the user sets slab_type (which is deprecated)
# and throw a warning.
default_slab_type = "pdf",
# orientation auto-detection here is different from base AbstractStatSlabinterval
# (main_is_orthogonal needs to be FALSE)
orientation_options = defaults(list(
secondary_is_dist = TRUE
), AbstractStatSlabinterval$orientation_options),
setup_data = function(self, data, params) {
data = ggproto_parent(AbstractStatSlabinterval, self)$setup_data(data, params)
define_orientation_variables(params$orientation)
# check for dist-like objects in x / y axis: these are likely user errors
# caused by assigning a distribution to x / y instead of xdist / ydist
dist_like_cols = c("x", "y")[map_lgl_(c("x", "y"), function(col) is_dist_like(data[[col]]))]
if (length(dist_like_cols) > 0) {
s = if (length(dist_like_cols) > 1) "s"
stop0(
"Cannot use distribution or rvar objects with the ", paste0("`", dist_like_cols, "`", collapse = " or "),
" aesthetic", s, ". Use the ", paste0("`", dist_like_cols, "dist`", collapse = " or "),
" aesthetic", s, " instead."
)
}
# pull out the x (secondary) dist axis into the dist aesthetic
if (!is.null(data[[xdist]])) {
#TODO: reverse this: move dist into xdist and use xdist elsewhere
data$dist = data[[xdist]]
}
if (is.character(data$dist) || is.factor(data$dist)) {
# ignore unknown distributions (with a warning)
data$dist = check_dist_name(data$dist)
# convert character/factor dist aesthetic into distributional objects
arg_cols = names(data)[startsWith(names(data), "arg")]
data$dist = pmap_(data[, c("dist", arg_cols)], function(dist, ...) {
if (is.na(dist)) {
dist_missing()
} else {
.args = args_from_aes(...)
do.call(dist_wrap, c(list(dist), .args))
}
})
}
if (!is.null(data$dist) && self$group_by_dist) {
# Need to group by rows in the data frame to draw correctly, as
# each output slab will need to be in its own group.
# First check if we are grouped by rows already (in which case leave it)
if (anyDuplicated(data$group)) {
# need to make new groups that ensure every row is unique but which
# preserve old group order at the top level
data$group = as.numeric(interaction(
data$group, seq_len(nrow(data)), drop = TRUE, lex.order = TRUE
))
}
}
data
},
setup_params = function(self, data, params) {
params = ggproto_parent(AbstractStatSlabinterval, self)$setup_params(data, params)
# override deprecated slab_type with default_slab_type
if (is.null(params$slab_type)) {
params$slab_type = self$default_slab_type
} else {
cli_warn(c(
'The {.arg slab_type} parameter for {.pkg ggdist} stats is deprecated.',
'i' = 'Instead of using {.arg slab_type}, use {.fun ggplot2::after_stat} to
map the desired computed variable, e.g. {.code pdf} or {.code cdf}, onto
an aesthetic, e.g. {.code aes(thickness = after_stat(pdf))}. Specifically:',
'*' = 'To replace {.code slab_type = "pdf"}, map {.code after_stat(pdf)} onto an aesthetic.',
'*' = 'To replace {.code slab_type = "cdf"}, map {.code after_stat(cdf)} onto an aesthetic.',
'*' = 'To replace {.code slab_type = "ccdf"}, map {.code after_stat(1 - cdf)} onto an aesthetic.',
'*' = 'To replace {.code slab_type = "histogram"}, map {.code after_stat(pdf)} onto an aesthetic and
pass {.code density = "histogram"} to the stat.',
'i' = 'For more information, see the {.emph Computed Variables} section of {.fun ggdist::stat_slabinterval}.'
))
}
params
},
compute_panel = function(self, data, scales,
orientation,
na.rm,
...
) {
define_orientation_variables(orientation)
# convert x/y secondary positional aesthetic into dist_sample
# must do this here instead of in setup_data so that we can invert
# the scale transformation --- this ensures that data supplied via dist_sample
# and data supplied via the x/y positional aesthetics can all be treated
# as being on the untransformed scale in any subsequent code
if (is.null(data$dist) && is.numeric(data[[x]])) {
trans = scales[[x]]$trans %||% scales::identity_trans()
data = remove_missing(data, na.rm, x, name = "stat_slabinterval")
if (inherits(data[[x]], "mapped_discrete") && is_integerish(data[[x]])) {
# integer-like mapped discrete data needs to be converted to integer here
# so that it will be treated as a discrete distribution later
# (e.g. by distr_is_discrete())
data[[x]] = as.integer(data[[x]])
}
# dist aesthetic is not provided but x aesthetic is, and x is not a dist
# this means we need to wrap it as a weighted dist_sample
data = summarise_by(data, c("PANEL", y, "group"), function(d) {
data.frame(dist = .dist_weighted_sample(list(trans$inverse(d[[x]])), list(d[["weight"]])))
})
data[[x]] = median(data$dist)
}
# handle logical distributions: logical distributions don't play well with
# numeric scales, so we use a bit of a hack and convert them to integers
# by adding 0L to them
if (is_distribution(data$dist)) {
is_logical = map_lgl_(data$dist, distr_is_logical)
data$dist[is_logical] = data$dist[is_logical] + 0L
} else if (inherits(data$dist, "rvar") && distr_is_logical(data$dist)) {
data$dist = data$dist + 0L
}
# handle rvar factors / categorical dists: our modified version of Layer$compute_aesthetics will
# already have ensured the scale is discrete with appropriate limits, we
# just need to adjust the rvar (or wrap the distribution) to have the same levels as the scale limits
# (in case levels have been re-ordered / added / removed)
if (inherits(data$dist, "rvar_factor")) {
data$dist = posterior::rvar_factor(
data$dist,
levels = scales[[x]]$get_limits(),
ordered = inherits(data$dist, "rvar_ordered")
)
} else if (distr_is_factor_like(data$dist)) {
new_levels = scales[[x]]$get_limits()
data$dist = .dist_wrapped_categorical(
data$dist,
new_levels = new_levels
)
}
ggproto_parent(AbstractStatSlabinterval, self)$compute_panel(
data, scales,
orientation = orientation,
na.rm = na.rm,
...
)
},
# workaround (#84)
compute_limits = function(self, ...) compute_limits_slabinterval(self, ...),
compute_slab = function(self, ...) compute_slab_slabinterval(self, ...),
compute_interval = function(self, ...) compute_interval_slabinterval(self, ...)
)
#' @export
#' @rdname stat_slabinterval
stat_slabinterval = make_stat(StatSlabinterval, geom = "slabinterval")
# layer for slabinterval --------------------------------------------------
#' Alternative to ggplot2::layer() which adds necessary hooks to the Layer for
#' stat_slabinterval. See the layer_function property of StatSlabinterval
#' @noRd
layer_slabinterval = function(...) {
l = layer(...)
ggproto(NULL, l,
compute_aesthetics = function(self, data, plot) {
data = ggproto_parent(l, self)$compute_aesthetics(data, plot)
# factor-like dists: must be handled in a two-step process: first, here
# we have to ensure the x/y scale is setup correctly as a discrete scale
# with levels from the distribution. Then, in compute_panel, we will convert
# factor-like dists and rvars to integers using the x/y scale.
for (xy in c("x", "y")) {
dist = paste0(xy, "dist")
if (distr_is_factor_like(data[[dist]])) {
# ensure a discrete scale has been added to the plot with appropriate limits
scale = plot$scales$get_scales(xy)
if (is.null(scale)) {
scale = utils::getFromNamespace(paste0("scale_", xy, "_discrete"), "ggplot2")()
plot$scales$add(scale)
}
scale$limits = scale$limits %||% distr_levels(data[[dist]])
scale$train(posterior::draws_of(data[[dist]]))
}
}
data
}
)
}
# shortcut stats ----------------------------------------------------------
StatHalfeye = StatSlabinterval
#' @eval rd_slabinterval_shortcut_stat(
#' "halfeye", "half-eye (density + interval)", geom_name = "slabinterval", describe = FALSE
#' )
#' @description
#' Equivalent to [stat_slabinterval()], whose default settings create half-eye (density + interval) plots.
#' @export
stat_halfeye = stat_slabinterval
StatEye = ggproto("StatEye", StatSlabinterval,
default_aes = defaults(aes(
side = after_stat("both")
), StatSlabinterval$default_aes)
)
#' @eval rd_slabinterval_shortcut_stat("eye", "eye (violin + interval)", geom_name = "slabinterval")
#' @export
stat_eye = make_stat(StatEye, geom = "slabinterval")
StatCcdfinterval = ggproto("StatCcdfinterval", StatSlabinterval,
default_aes = defaults(aes(
thickness = after_stat(thickness(1 - cdf, 0, 1)),
justification = after_stat(0.5),
side = after_stat("topleft")
), StatSlabinterval$default_aes),
default_params = defaults(list(
normalize = "none",
expand = TRUE
), StatSlabinterval$default_params),
default_slab_type = "ccdf"
)
#' @eval rd_slabinterval_shortcut_stat(
#' "ccdfinterval", "CCDF bar", geom_name = "slabinterval", example_layers = "expand_limits(x = 0)"
#' )
#' @export
stat_ccdfinterval = make_stat(StatCcdfinterval, geom = "slabinterval")
StatCdfinterval = ggproto("StatCdfinterval", StatCcdfinterval,
default_aes = defaults(aes(
thickness = after_stat(thickness(cdf, 0, 1))
), StatCcdfinterval$default_aes),
default_slab_type = "cdf"
)
#' @eval rd_slabinterval_shortcut_stat("cdfinterval", "CDF bar", geom_name = "slabinterval")
#' @export
stat_cdfinterval = make_stat(StatCdfinterval, geom = "slabinterval")
StatGradientinterval = ggproto("StatGradientinterval", StatSlabinterval,
default_aes = defaults(aes(
justification = after_stat(0.5),
thickness = after_stat(thickness(1)),
slab_alpha = after_stat(f)
), StatSlabinterval$default_aes),
default_params = defaults(list(
fill_type = "auto"
), StatSlabinterval$default_params),
layer_args = defaults(list(
show.legend = c(size = FALSE, slab_alpha = FALSE)
), StatSlabinterval$layer_args)
)
#' @eval rd_slabinterval_shortcut_stat("gradientinterval", "gradient + interval", geom_name = "slabinterval")
#' @description
#' If your graphics device supports it, it is recommended to use this stat
#' with `fill_type = "gradient"` (see the description of that parameter). On R >= 4.2,
#' support for `fill_type = "gradient"` should be auto-detected based on the
#' graphics device you are using.
#' @export
stat_gradientinterval = make_stat(StatGradientinterval, geom = "slabinterval")
StatHistinterval = ggproto("StatHistinterval", StatSlabinterval,
default_params = defaults(list(
density = "histogram"
), StatSlabinterval$default_params),
default_slab_type = "histogram"
)
#' @eval rd_slabinterval_shortcut_stat("histinterval", "histogram + interval", geom_name = "slabinterval")
#' @export
stat_histinterval = make_stat(StatHistinterval, geom = "slabinterval")
StatSlab = ggproto("StatSlab", StatSlabinterval,
default_params = defaults(list(
show_point = FALSE,
show_interval = FALSE
), StatSlabinterval$default_params),
layer_args = defaults(list(
show.legend = NA
), StatSlabinterval$layer_args),
hidden_params = union(c(
"show_slab", "show_point", "show_interval",
"point_interval", ".width"
), StatSlabinterval$hidden_params)
)
StatSlab$default_aes$size = NULL
#' @eval rd_slabinterval_shortcut_stat("slab", "slab (ridge)", geom_name = "slab")
#' @examples
#'
#' # RIDGE PLOTS
#' # "ridge" plots can be created by expanding the slabs to the limits of the plot
#' # (expand = TRUE), allowing the density estimator to be nonzero outside the
#' # limits of the data (trim = FALSE), and increasing the height of the slabs.
#' data.frame(
#' group = letters[1:3],
#' value = rnorm(3000, 3:1)
#' ) %>%
#' ggplot(aes(y = group, x = value)) +
#' stat_slab(color = "black", expand = TRUE, trim = FALSE, height = 2)
#' @export
stat_slab = make_stat(StatSlab, geom = "slab")
# helpers -----------------------------------------------------------------
#' translate distribution arguments of the form `arg1` ... `arg9` (or from a
#' list column, `args`) into a single list of arguments
#' @param args list-column of arguments
#' @param ... other columns, including `arg1` ... `arg9`
#' @noRd
args_from_aes = function(args = list(), ...) {
args_names = names(args)
if (length(args_names) > 0) {
named_args_i = nzchar(args_names)
named_args = args[named_args_i]
unnamed_args = args[!named_args_i]
} else {
named_args = list()
unnamed_args = args
}
dot_args = list(...)
for (i in 1:9) {
arg_name = paste0("arg", i)
if (arg_name %in% names(dot_args)) {
unnamed_args[[i]] = dot_args[[arg_name]]
}
}
c(unnamed_args, named_args)
}
#' Given the slab_type parameter and a data frame or list containing `pdf` and
#' `cdf` computed variables, return the value of the slab function (the `f`
#' computed variable)
#' @param slab_type string: `"pdf"`, `"cdf"`, `"histogram"`, or `"ccdf"`. If
#' the `slab_type` is not valid, an appropriate error will be raised.
#' @param slab_df a list or data frame containing elements `"pdf"` and `"cdf"`.
#' @noRd
get_slab_function = function(slab_type, slab_df) {
switch(slab_type,
histogram = ,
pdf = slab_df$pdf,
cdf = slab_df$cdf,
ccdf = 1 - slab_df$cdf,
stop0("Unknown `slab_type`: ", deparse0(slab_type), '. Must be "histogram", "pdf", "cdf", or "ccdf"')
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.