R/utilities.R

Defines functions generalized_mean weighted_mode

Documented in generalized_mean weighted_mode

#' Generalized mean
#' @export
#' @description Create a function to calculate a generalized mean.
#' @param p Exponent of the generalized mean.
#' @return A function.
#' @details Returns a function to calculate the generalized mean with exponent \code{p} (i.e., the weighted mean of \code{x} to the power of \code{p}, all raised to the power of 1/\code{p}).
#' The return value is a function of \code{\link[stats]{weighted.mean}}, unless \code{p} = -Inf or \code{p} = Inf, in which case it is a function of min/max.
#' @seealso See \code{\link[ppd]{geomean}}.

generalized_mean <- function(p) {
  # p should be a length 1 number
  stopifnot(length(p) == 1L, is.numeric(p), !is.na(p))
  p <- as.numeric(p)
  # assign appropriate generalized mean function based on p
  genmean <- if (identical(p, 0)) { # geomean if p = 0
    function(x, w, na.rm) exp(stats::weighted.mean(log(x), w, na.rm = na.rm))
  } else if (identical(p, -Inf)) { # min if p = -inf
    function(x, w, na.rm) min(x, na.rm = na.rm)
  } else if (identical(p, Inf)) { # max if p = inf
    function(x, w, na.rm) max(x, na.rm = na.rm)
  } else if (identical(abs(p), 1)) {
    if (identical(p, 1)) { # arithmetic mean if p = 1
      function(x, w, na.rm) stats::weighted.mean(x, w, na.rm = na.rm)
    } else { # harmonic mean if p = -1
      function(x, w, na.rm) 1 / stats::weighted.mean(1 / x, w, na.rm = na.rm)
    }
  } else if (p < 0) { # generalized mean otherwise
    function(x, w, na.rm) (stats::weighted.mean((1 / x^abs(p)), w, na.rm = na.rm))^(1 / p)
  } else {
    function(x, w, na.rm) (stats::weighted.mean(x^p, w, na.rm = na.rm))^(1 / p)
  }
  # return value
  function(x, w, na.rm = FALSE) {
    stopifnot(
      is.numeric(x) || is.logical(x) || is.complex(x), 
      missing(w) || is.numeric(w), 
      missing(w) || length(x) == length(w),
      length(na.rm) == 1L, 
      is.logical(na.rm)
    )
    if (length(x) == 0L || (!missing(w) && all(w == 0))) return(NaN)
    genmean(x, w, na.rm)
  }
}

#' Weighted geometric/harmonic mean
#' @export
#' @description Calculate a weighted geometric or harmonic mean.
#' @param x A numeric vector.
#' @param w An optional vector of (numeric) weights, the same length as \code{x}.
#' @param na.rm Should missing values be removed?
#' @return A numeric value.
#' @details The underlying calculation is done with \code{\link[stats]{weighted.mean}}. 
#' This means that weights are handled in the same way as \code{\link[stats]{weighted.mean}}.
#' 
#' The implementation and performance is similar to the \code{geometric.mean} and \code{harmonic.mean} functions in the \code{psych} package, and the \code{geoMean} function in the \code{EnvStats} package, with the exception that weights are allowed.
#' @seealso See \code{\link[ppd]{generalized_mean}} for making other types of generalized means.
#' @examples
#' # Geometric mean
#' geomean(2:3)
#' 
#' # Same as manual calculation
#' sqrt(6)
#' 
#' # Using prod to calculate geomean manually can give misleading results
#' prod(1:1000)^(1/1000)
#' geomean(1:1000)
#' 
#' # Geomean is usually faster anyways
#' x <- runif(1000000)
#' system.time(geomean(x))
#' system.time(prod(x^(1/length(x))))
#' 
#' # Harmonic mean
#' harmean(2:3)
#' 
#' # Same as manual calculation
#' (sum((2:3)^(-1)) / 2)^(-1)
geomean <- generalized_mean(0)

#' @rdname geomean
#' @export
harmean <- generalized_mean(-1)



#' Weighted mode
#' @description Calculate the weighted mode of a vector.
#' @author Steve Martin
#' @param x An atomic vector.
#' @param w An optional vector of weights, the same length as \code{x}. Calculates an unweighted mode if no weights are given.
#' @param unique Should multiple modes result in NA?
#' @param na.rm Should missing values in \code{x} be removed?
#' @return A vector of the same type as \code{x}.
#' @details Returns the values in \code{x} with the largest weight. Leaving \code{w} blank, or giving a vector of equal weights, returns the most frequently occurring values in \code{x}.
#' This is equivalent to choosing a value m that minimizes the weighted mean of the hamming distance between the elements of \code{x} and m.
#'
#' The mode need not be unique, and NA is returned if \code{unique} is TRUE and there are multiple modes.
#' @examples
#' \dontrun{
#' # Unique mode
#' weighted_mode(c(1, 3, 2, 1))
#' weighted_mode(c(TRUE, FALSE, TRUE, FALSE), 1:4)
#' # Two modes
#' weighted_mode(c('a', 'b', 'c', 'b', 'c'))
#' }

weighted_mode <- function(x, w, unique = TRUE, na.rm = FALSE) {
  # Check user input
  if (!is.atomic(x)) stop('x must be an atomic vector.')
  if (!missing(w) && length(x) != length(w)) stop('x and w must be the same length.')
  if (length(x) == 0L) return(x[0][NA])
  unique <- isTRUE(unique)
  # Deal with NAs
  if (anyNA(x)) {
    if (isTRUE(na.rm)) {
      na <- !is.na(x)
      x <- x[na]
      if (!missing(w)) {
        w <- w[na]
      }
    } else {
      return(x[0][NA])
    }
  }
  # Unweighted mode
  if (missing(w)) {
  # Count unique values in x
    ux <- unique(x)
    t <- tabulate(match(x, ux))
  } else {
  # Weighted mode
  # Count value of weight for each unique value in x
    ux <- sort(unique(x))
    t <- vapply(split(w, x), sum, numeric(1))
  }
  # Find values in x with largest count
  keep <- t == max(t)
  if (sum(keep) > 1L && isTRUE(unique)) {
    x[0][NA]
  } else {
    ux[keep]
  }
}
marberts/ppd documentation built on March 27, 2020, 7:21 p.m.