Nothing
#' Compute pairwise distances for continuous numeric data
#'
#' Internal helper function to compute pairwise distance matrices for purely numeric datasets.
#' Supports standard metrics, including Euclidean, Manhattan, Chebyshev, Canberra, Minkowski,
#' standardized Euclidean, and Mahalanobis distances.
#'
#' Supported methods and formulas (for observations \eqn{\mathbf{z}_i} and \eqn{\mathbf{z}_j}):
#' \itemize{
#' \item \code{"euclidean"}: \deqn{\delta_E(i,j) = \sqrt{\sum_{k=1}^{p} (z_{ik} - z_{jk})^2}}
#' \item \code{"minkowski"}: \deqn{\delta_q(i,j) = \left( \sum_{k=1}^{p} |z_{ik} - z_{jk}|^q \right)^{1/q}} requires \code{p = q}
#' \item \code{"manhattan"}: \deqn{\delta_1(i,j) = \sum_{k=1}^{p} |z_{ik} - z_{jk}|}
#' \item \code{"maximum"}: \deqn{\delta_\infty(i,j) = \max_k |z_{ik} - z_{jk}|}
#' \item \code{"canberra"}: \deqn{\delta_C(i,j) = \sum_{k=1}^{p} \frac{|z_{ik} - z_{jk}|}{|z_{ik}| + |z_{jk}|}} convention: \eqn{0/0 := 0}
#' \item \code{"euclidean_standardized"}: \deqn{\delta_K(i,j) = \sqrt{\sum_{k=1}^{p} \frac{(z_{ik} - z_{jk})^2}{s_k^2}}} \eqn{s_k^2} is the variance of variable k
#' \item \code{"mahalanobis"}: \deqn{\delta_M(i,j) = \sqrt{ (\mathbf{z}_i - \mathbf{z}_j)' \mathbf{S}^{-1} (\mathbf{z}_i - \mathbf{z}_j) }} \eqn{\mathbf{S}} is the covariance matrix
#' }
#'
#' @param x A numeric data frame or matrix with rows as observations and columns as variables.
#' @param method Distance metric to compute (see details for supported options).
#' @param p Numeric, the power parameter for Minkowski distance (required if \code{method = "minkowski"}).
#'
#' @return A symmetric numeric matrix of pairwise distances between rows of \code{x}.
#' The diagonal contains zeros.
#'
#' @details
#' Considerations when choosing a distance metric:
#' \itemize{
#' \item For \code{"euclidean_standardized"}, columns are standardized to mean 0 and variance 1 before
#' computing Euclidean distances.
#' \item Cosine and correlation distances rely on the \pkg{proxy} package; these are not guaranteed to be
#' strictly Euclidean.
#' \item Minkowski distance requires specifying the parameter \code{p} (e.g., \code{p = 3} for L3 norm).
#' \item Mahalanobis distance uses the inverse of the covariance matrix. If the covariance matrix is
#' singular, the generalized inverse from \pkg{MASS::ginv} is used.
#' \item Standard metrics (Euclidean, Manhattan, Maximum, Canberra) are computed using \code{stats::dist}.
#' }
#' @examples
#' # Small numeric matrix
#' mat <- matrix(c(1, 2, 3,
#' 4, 5, 6,
#' 7, 8, 9), nrow = 3, byrow = TRUE)
#'
#' # Euclidean distance
#' dbrobust:::dist_continuous(mat, method = "euclidean")
#'
#' # Standardized Euclidean
#' dbrobust:::dist_continuous(mat, method = "euclidean_standardized")
#'
#' # Minkowski distance with p = 3
#' dbrobust:::dist_continuous(mat, method = "minkowski", p = 3)
#'
#' # Mahalanobis distance
#' set.seed(123)
#' mat <- matrix(rnorm(5*3), nrow = 5, ncol = 3)
#' colnames(mat) <- c("X1","X2","X3")
#' # Compute the mahalanobis distance
#' dbrobust:::dist_continuous(mat, method = "mahalanobis")
#'
#' # Cosine distance (requires 'proxy' package)
#' dbrobust:::dist_continuous(mat, method = "cosine")
#'
#' @keywords internal
dist_continuous <- function(x, method, p = NULL) {
# Coerce to matrix to simplify computations
if (is.data.frame(x)) x <- as.matrix(x)
# Validate that all columns are numeric
if (!is.numeric(x)) {
stop("Continuous methods require all columns to be numeric")
}
# Euclidean with column standardization
if (method == "euclidean_standardized") {
x_scaled <- scale(x)
return(as.matrix(stats::dist(x_scaled, method = "euclidean")))
}
# Cosine / Correlation distances: rely on 'proxy'
if (method %in% c("cosine", "correlation")) {
if (!requireNamespace("proxy", quietly = TRUE)) {
stop("Package 'proxy' is required for method: ", method)
}
warning(
"The chosen method '", method,
"' does not guarantee Euclidean distances. Proceeding with calculation.",
call. = FALSE
)
return(as.matrix(proxy::dist(x, method = method)))
}
# Minkowski distance requires exponent parameter p
if (method == "minkowski") {
if (is.null(p)) stop("You must specify parameter 'p' for Minkowski distance")
return(as.matrix(stats::dist(x, method = "minkowski", p = p)))
}
# Mahalanobis distance: uses covariance structure
if (method == "mahalanobis") {
if (ncol(x) < 2) {
stop("Mahalanobis distance requires at least two variables")
}
covmat <- stats::cov(x, use = "pairwise.complete.obs")
inv_covmat <- tryCatch(
solve(covmat),
error = function(e) {
warning("Covariance matrix singular. Using generalized inverse (MASS::ginv).")
MASS::ginv(covmat)
}
)
n <- nrow(x)
d <- matrix(0, n, n)
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
diff <- as.numeric(x[i, ] - x[j, ])
d[i, j] <- d[j, i] <- sqrt(t(diff) %*% inv_covmat %*% diff)
}
}
diag(d) <- 0
return(d)
}
# Fallback: use base R 'dist' for standard methods (euclidean, manhattan, etc.)
return(as.matrix(stats::dist(x, method = method)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.