#' Generalized mean
#' @export
#' @description Create a function to calculate a generalized mean.
#' @param p Exponent of the generalized mean.
#' @return A function.
#' @details Returns a function to calculate the generalized mean with exponent \code{p} (i.e., the weighted mean of \code{x} to the power of \code{p}, all raised to the power of 1/\code{p}).
#' The return value is a function of \code{\link[stats]{weighted.mean}}, unless \code{p} = -Inf or \code{p} = Inf, in which case it is a function of min/max.
#' @seealso See \code{\link[ppd]{geomean}}.
generalized_mean <- function(p) {
# p should be a length 1 number
stopifnot(length(p) == 1L, is.numeric(p), !is.na(p))
p <- as.numeric(p)
# assign appropriate generalized mean function based on p
genmean <- if (identical(p, 0)) { # geomean if p = 0
function(x, w, na.rm) exp(stats::weighted.mean(log(x), w, na.rm = na.rm))
} else if (identical(p, -Inf)) { # min if p = -inf
function(x, w, na.rm) min(x, na.rm = na.rm)
} else if (identical(p, Inf)) { # max if p = inf
function(x, w, na.rm) max(x, na.rm = na.rm)
} else if (identical(abs(p), 1)) {
if (identical(p, 1)) { # arithmetic mean if p = 1
function(x, w, na.rm) stats::weighted.mean(x, w, na.rm = na.rm)
} else { # harmonic mean if p = -1
function(x, w, na.rm) 1 / stats::weighted.mean(1 / x, w, na.rm = na.rm)
}
} else if (p < 0) { # generalized mean otherwise
function(x, w, na.rm) (stats::weighted.mean((1 / x^abs(p)), w, na.rm = na.rm))^(1 / p)
} else {
function(x, w, na.rm) (stats::weighted.mean(x^p, w, na.rm = na.rm))^(1 / p)
}
# return value
function(x, w, na.rm = FALSE) {
stopifnot(
is.numeric(x) || is.logical(x) || is.complex(x),
missing(w) || is.numeric(w),
missing(w) || length(x) == length(w),
length(na.rm) == 1L,
is.logical(na.rm)
)
if (length(x) == 0L || (!missing(w) && all(w == 0))) return(NaN)
genmean(x, w, na.rm)
}
}
#' Weighted geometric/harmonic mean
#' @export
#' @description Calculate a weighted geometric or harmonic mean.
#' @param x A numeric vector.
#' @param w An optional vector of (numeric) weights, the same length as \code{x}.
#' @param na.rm Should missing values be removed?
#' @return A numeric value.
#' @details The underlying calculation is done with \code{\link[stats]{weighted.mean}}.
#' This means that weights are handled in the same way as \code{\link[stats]{weighted.mean}}.
#'
#' The implementation and performance is similar to the \code{geometric.mean} and \code{harmonic.mean} functions in the \code{psych} package, and the \code{geoMean} function in the \code{EnvStats} package, with the exception that weights are allowed.
#' @seealso See \code{\link[ppd]{generalized_mean}} for making other types of generalized means.
#' @examples
#' # Geometric mean
#' geomean(2:3)
#'
#' # Same as manual calculation
#' sqrt(6)
#'
#' # Using prod to calculate geomean manually can give misleading results
#' prod(1:1000)^(1/1000)
#' geomean(1:1000)
#'
#' # Geomean is usually faster anyways
#' x <- runif(1000000)
#' system.time(geomean(x))
#' system.time(prod(x^(1/length(x))))
#'
#' # Harmonic mean
#' harmean(2:3)
#'
#' # Same as manual calculation
#' (sum((2:3)^(-1)) / 2)^(-1)
geomean <- generalized_mean(0)
#' @rdname geomean
#' @export
harmean <- generalized_mean(-1)
#' Weighted mode
#' @description Calculate the weighted mode of a vector.
#' @author Steve Martin
#' @param x An atomic vector.
#' @param w An optional vector of weights, the same length as \code{x}. Calculates an unweighted mode if no weights are given.
#' @param unique Should multiple modes result in NA?
#' @param na.rm Should missing values in \code{x} be removed?
#' @return A vector of the same type as \code{x}.
#' @details Returns the values in \code{x} with the largest weight. Leaving \code{w} blank, or giving a vector of equal weights, returns the most frequently occurring values in \code{x}.
#' This is equivalent to choosing a value m that minimizes the weighted mean of the hamming distance between the elements of \code{x} and m.
#'
#' The mode need not be unique, and NA is returned if \code{unique} is TRUE and there are multiple modes.
#' @examples
#' \dontrun{
#' # Unique mode
#' weighted_mode(c(1, 3, 2, 1))
#' weighted_mode(c(TRUE, FALSE, TRUE, FALSE), 1:4)
#' # Two modes
#' weighted_mode(c('a', 'b', 'c', 'b', 'c'))
#' }
weighted_mode <- function(x, w, unique = TRUE, na.rm = FALSE) {
# Check user input
if (!is.atomic(x)) stop('x must be an atomic vector.')
if (!missing(w) && length(x) != length(w)) stop('x and w must be the same length.')
if (length(x) == 0L) return(x[0][NA])
unique <- isTRUE(unique)
# Deal with NAs
if (anyNA(x)) {
if (isTRUE(na.rm)) {
na <- !is.na(x)
x <- x[na]
if (!missing(w)) {
w <- w[na]
}
} else {
return(x[0][NA])
}
}
# Unweighted mode
if (missing(w)) {
# Count unique values in x
ux <- unique(x)
t <- tabulate(match(x, ux))
} else {
# Weighted mode
# Count value of weight for each unique value in x
ux <- sort(unique(x))
t <- vapply(split(w, x), sum, numeric(1))
}
# Find values in x with largest count
keep <- t == max(t)
if (sum(keep) > 1L && isTRUE(unique)) {
x[0][NA]
} else {
ux[keep]
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.