Nothing
#' Force a Pairwise Squared Distance Matrix to Euclidean Form
#'
#' Given a pairwise squared distance matrix \eqn{D} (where \eqn{D[i,j] = d(i,j)^2}),
#' this function ensures that \eqn{D} corresponds to a valid Euclidean squared
#' distance matrix. The correction is based on the weighted Gram matrix
#' \eqn{G_w = -\frac{1}{2} J_w D J_w^\top}, where \eqn{J_w = I_n - \mathbf{1} w^\top}
#' is the centering matrix defined by the weight vector \eqn{w}.
#'
#' If the smallest eigenvalue \eqn{\lambda_{\min}} of \eqn{G_w} is below the
#' negative tolerance \code{-tol}, the function corrects \eqn{D} by adding a
#' constant shift to guarantee positive semi-definiteness of the Gram matrix,
#' following the approach of \insertCite{lingoes1971some}{dbrobust} and
#' \insertCite{mardia1978some}{dbrobust}:
#' \deqn{
#' D_{\text{new}} = D + 2 c \mathbf{1} \mathbf{1}^\top - 2 c I_n,
#' }
#' where \eqn{c = |\lambda_{\min}|}.
#'
#' @param D Numeric square matrix (n x n) of pairwise squared distances.
#' Must be symmetric with zeros on the diagonal.
#' @param w Numeric vector of weights (length n). Internally normalized to sum to 1.
#' @param tol Numeric tolerance for detecting negative eigenvalues (default: 1e-10).
#'
#' @return A list with components:
#' \describe{
#' \item{D_euc}{Corrected pairwise squared Euclidean distance matrix (n x n).}
#' \item{eigvals_before}{Eigenvalues of the weighted Gram matrix before correction.}
#' \item{eigvals_after}{Eigenvalues of the weighted Gram matrix after correction.}
#' \item{transformed}{Logical, TRUE if correction was applied, FALSE otherwise.}
#' }
#'
#' @seealso
#' \code{\link[stats]{dist}}, \code{\link[base]{eigen}}, \code{\link{cmdscale}}
#'
#' @examples
#' # Load example dataset
#' data("Data_HC_contamination")
#'
#' # Reduce dataset to first 50 rows
#' Data_small <- Data_HC_contamination[1:50, ]
#'
#' # Select only continuous variables
#' cont_vars <- names(Data_small)[1:4]
#' Data_cont <- Data_small[, cont_vars]
#'
#' # Compute squared Euclidean distance matrix
#' dist_mat <- as.matrix(dist(Data_cont))^2
#'
#' # Introduce a small non-Euclidean distortion
#' dist_mat[1, 2] <- dist_mat[1, 2] * 0.5
#' dist_mat[2, 1] <- dist_mat[1, 2]
#'
#' # Uniform weights
#' weights <- rep(1, nrow(Data_cont))
#'
#' # Apply Euclidean correction
#' res <- make_euclidean(dist_mat, weights)
#'
#' # Check results (minimum eigenvalues before/after)
#' res$transformed
#' min(res$eigvals_before)
#' min(res$eigvals_after)
#'
#' # First 5x5 block of corrected matrix
#' round(res$D_euc[1:5, 1:5], 4)
#'
#' @references
#' \insertRef{lingoes1971some}{dbrobust}
#' \insertRef{mardia1978some}{dbrobust}
#'
#' @export
make_euclidean <- function(D, w, tol = 1e-10) {
if (!is.matrix(D) || nrow(D) != ncol(D)) {
stop("D must be a square matrix of pairwise squared distances.")
}
n <- nrow(D)
if (length(w) != n) {
stop("Length of weights vector must equal number of observations.")
}
if (!isSymmetric(D)) {
stop("D must be symmetric.")
}
if (!all(diag(D) == 0)) {
stop("Diagonal entries of D must be zero.")
}
w <- w / sum(w) # Normalize weights
# Weighted centering matrix
Jw <- diag(n) - rep(1, n) %*% t(w)
Gw <- -0.5 * Jw %*% D %*% t(Jw)
eigvals_before <- eigen(Gw, symmetric = TRUE, only.values = TRUE)$values
min_eig <- min(eigvals_before)
transformed <- FALSE
if (min_eig < -tol) {
c <- abs(min_eig)
Ones <- matrix(1, n, n)
D_euc <- D + 2 * c * Ones - 2 * c * diag(n)
transformed <- TRUE
} else {
D_euc <- D
}
# Compute eigenvalues after correction for info
Gw_after <- -0.5 * Jw %*% D_euc %*% t(Jw)
eigvals_after <- eigen(Gw_after, symmetric = TRUE, only.values = TRUE)$values
return(list(
D_euc = D_euc,
eigvals_before = eigvals_before,
eigvals_after = eigvals_after,
transformed = transformed
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.