R/distances.R

Defines functions add_maha_d maha_d

Documented in add_maha_d maha_d

#' Mahalanobis distance
#'
#' A wrapper for stats::mahalanobis
#'
#' @param x A data.frame or matrix of numeric values
#' @param .f The centering function.  Defaults to use `mean`.
#' @param inverted Logical.  If TRUE, the covariance matrix (p x p) of the
#'   distribution is supposed to contain the inverse of the covariance matrix.
#'
#' @export
#'
#' @examples
#'
#' maha_d(iris[1:3])

maha_d <- function(x, .f = mean, inverted = FALSE) {
  res <- mahalanobis(x, sapply(x, .f), cov(x), inverted = inverted)
  attr(res, "p-values") <- pchisq(res, ncol(x), lower.tail = FALSE)
  res
}

#' Add mahalanobis distance to a data.frame
#'
#' Computes the mahalanobis distance and adds it to the end of the data.frame
#'   as well as the outlier p-value.
#'
#' @param df The data.frame to add
#' @param cols A character vector of the columns to select from the data.frame
#'
#' @rdname maha_d
#' @export
#'
#' @examples
#' add_maha_d(iris[1:3])

add_maha_d <- function(df, cols = NULL, .f = mean, inverted = FALSE) {
  stopifnot(is.data.frame(df))
  if(is.null(cols)) {
    x <- df
  } else {
    x <- df[cols]
  }
  md <- maha_d(x, .f = .f, inverted = inverted)
  p_value <- attributes(md)
  cbind(x, md, p_value)
}
jmbarbone/qpm documentation built on July 25, 2020, 10:41 p.m.