R/compute_stat_mode.R

Defines functions compute_stat_mode

Documented in compute_stat_mode

# ---------------------------------------------------------------------------- #
#' Compute statistical mode of sample data
#'
#' Computes the statistical mode of the supplied vector under some simplifying
#' assumptions (see 'Details' for more information).
#'
#' @param x A vector (integer, numeric, logical, character or factor) for which
#'   the mode should be computed.
#'
#' @details
#' If x is numeric and assumes integer values then the mode is computed by
#' tabulating its frequencies: the maximum value (or, if there are ties, maximum
#' values) would then be the mode (or modes). If all values are unique then
#' there is no mode and \code{compute_stat_mode} returns \code{NA}.\cr
#'
#' If x is numeric and assumes real values, it is assumed to be a sample from a
#' continuous random variable. In this case the mode is estimated by computing
#' the kernel density function of x and returning the (single) value which
#' corresponds to the maximum of the estimated kernel density. Note that in this
#' case only one modal value - i.e. the first - will be returned.
#'
#' @returns The statistical mode computed from the supplied vector.
#'
#' @examples
#' compute_stat_mode(c(1, 1, 3, 5, 1, 3, 1, 2)) # => 1
#' compute_stat_mode(c(1, 3, 5, 2)) # => NA
#' compute_stat_mode(c(4, 4, 4, 4, 4, 4)) # => NA
#' compute_stat_mode(c(63, 62, 66, 67, 63, 70, 67, 68, 61)) # => 63, 67
#'
#' compute_stat_mode(c("a", "b", "c", "c")) # => "c"
#'
#' set.seed(10)
#' compute_stat_mode(rnorm(100)) # => -0.1795327
#'
#' set.seed(100)
#' mean(vapply(1:5, function(x) compute_stat_mode(rnorm(100)),
#'   FUN.VALUE = numeric(1)
#' )) # => -0.0069
#'
#' @export
#'
compute_stat_mode <- function(x) {

  # see
  # https://stats.stackexchange.com/questions/176112/
  #   how-to-find-the-mode-of-a-probability-density-function
  # [viewed 09jan20]

  valid_types <- c(
    "character", "double", "factor", "integer", "logical", "numeric"
  )

  if (!test_atomic_vector(x)) {
    cli_abort(c(
      "{.var x} must be an atomic vector",
      "x" = "You've supplied a {.cls {class(x)}}."
    ),
    class = "jute_error"
    )
  }

  if (!test_choice(typeof(x), valid_types)) {
    cli_abort(c(
      "{.var x} must be a numeric or character vector",
      "x" = "You've supplied a {.cls {typeof(x)}} vector.",
      "i" = "Valid types: {style_valid_types(valid_types)}."
    ),
    class = "jute_error"
    )
  }

  is_all <- function(x, type) {
    type <- match.arg(type, valid_types)
    switch(type,
      factor    = !isTRUE(any(!is.factor(x))),
      character = !isTRUE(any(!is.character(x))),
      integer   = !isTRUE(any(!is_int_val(x)))
    )
  }

  if (is_all(x, "factor") || is_all(x, "character") || is_all(x, "integer")) {
    uniq_x <- unique(x)
    if (length(uniq_x) == length(x)) {
      # In this case all values are unique
      return(NA)
    } else if (length(unique(table(x))) == 1) {
      # In this case all values are the same or all values appear with the same
      # frequency
      return(NA)
    }
    freq <- tabulate(match(x, uniq_x))
    mode_val <- uniq_x[which(freq == max(freq))]
    if (is.factor(mode_val)) {
      levels(x)[mode_val]
    } else {
      mode_val
    }
  } else {
    kern_den <- stats::density(x = x, bw = stats::bw.SJ(x))
    kern_den$x[which(kern_den$y == max(kern_den$y)[1])]
  }
}

# ---------------------------------------------------------------------------- #
toniprice/jute documentation built on Jan. 11, 2023, 8:23 a.m.