Nothing
#' Find a percentage of a distribution
#'
#' Given a distribution, find which values lie in the upper, lower, or middle proportion of the
#' distribution. Useful when you want to do something like shade in the middle 95% of a plot. This
#' is a greedy operation, meaning that if the cutoff point is between two whole numbers the
#' specified region will suck up the extra space. For example, the requesting the upper 30% of the
#' `[1 2 3 4]` will return `[FALSE FALSE TRUE TRUE]` because the 30% was greedy.
#'
#' Note that `NA` values are ignored, i.e. they will always return `FALSE`.
#'
#' @param x The distribution of values to check.
#' @param prop The proportion of values to find.
#' @param greedy Whether the function should be greedy, as per the description above.
#'
#' @return A logical vector indicating which values are in the specified region.
#'
#' @rdname distribution_parts
#' @export
#' @examples
#'
#' upper(1:10, .1)
#' lower(1:10, .2)
#' middle(1:10, .5)
#' tails(1:10, .5)
#'
#' sampling_distribution <- do(1000) * mean(rnorm(100, 5, 10))
#' sampling_distribution %>%
#' gf_histogram(~mean, data = sampling_distribution, fill = ~ middle(mean, .68)) %>%
#' gf_refine(scale_fill_manual(values = c("blue", "coral")))
middle <- function(x, prop = .95, greedy = TRUE) {
tail_prop <- (1 - prop) / 2
in_upper <- upper(x, tail_prop, !greedy)
in_lower <- lower(x, tail_prop, !greedy)
!in_upper & !in_lower
}
#' @rdname distribution_parts
#' @export
tails <- function(x, prop = .95, greedy = TRUE) {
!middle(x, prop, greedy)
}
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' `outer()` marks values in both outer tails of a distribution. It is the
#' complement of [middle()]: `outer(x, prop)` is equivalent to
#' `tails(x, 1 - prop)`.
#'
#' @param x The distribution of values to check.
#' @param prop The total proportion in both tails combined, must be in (0, 1).
#'
#' @rdname distribution_parts
#' @export
outer <- function(x, prop) {
lifecycle::signal_stage("experimental", "outer()")
if (!is.numeric(prop) || length(prop) != 1 || prop <= 0 || prop >= 1) {
abort("`prop` must be a single number between 0 and 1 (exclusive).")
}
tails(x, 1 - prop)
}
#' @rdname distribution_parts
#' @export
lower <- function(x, prop = .025, greedy = TRUE) {
values <- data.frame(x = x, original_pos = seq_along(x))
values <- values[order(x), , drop = FALSE]
values$in_zone <- seq_along(x) <= tail_size(x, prop, greedy)
values$in_zone[is.na(values$x)] <- rlang::na_lgl
values[order(values$original_pos), "in_zone", drop = TRUE]
}
#' @rdname distribution_parts
#' @export
upper <- function(x, prop = .025, greedy = TRUE) {
values <- data.frame(x = x, original_pos = seq_along(x))
values <- values[order(x, decreasing = TRUE), , drop = FALSE]
values$in_zone <- seq_along(x) <= tail_size(x, prop, greedy)
values$in_zone[is.na(values$x)] <- rlang::na_lgl
values[order(values$original_pos), "in_zone", drop = TRUE]
}
#' Calculate the number of values in the tail of a distribution
#'
#' @param x The distribution of values to check.
#' @param prop The proportion of values to find.
#' @param greedy Whether the function should be greedy, as per the description above.
#'
#' @return The number of values in the tail of the distribution.
#'
#' @noRd
tail_size <- function(x, prop, greedy) {
na_rm <- stats::na.omit(x)
tail_unbiased <- length(na_rm) * prop
if (greedy) ceiling(tail_unbiased) else floor(tail_unbiased)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.