Nothing
#' Robust Covariance Estimation Based on Geometric Variability
#'
#' Computes a robust covariance matrix for a weighted dataset by selecting
#' the most central subset of observations according to geometric variability.
#' Observations are ranked based on a proximity function measuring how far each
#' individual is from the rest of the data. The most central subset is then used
#' to compute a covariance matrix.
#'
#' @param X Numeric matrix of dimension n x p, where n is the number of observations
#' and p is the number of variables.
#' @param w Numeric vector of weights of length n. Weights will be normalized to sum to 1.
#' @param alpha Numeric trimming proportion between 0 and 1 (e.g., 0.05, 0.10, 0.15)
#' indicating the fraction of most extreme observations to discard.
#'
#' @return A list containing:
#' \describe{
#' \item{S}{Robust covariance matrix of dimension p x p.}
#' \item{central_idx}{Indices of observations selected as the central subset.}
#' \item{outlier_idx}{Indices of observations considered outliers.}
#' \item{phi}{Proximity function values for all observations.}
#' \item{q}{Threshold value used for trimming (quantile of phi).}
#' }
#'
#' @examples
#' # Load a small subset of the example dataset
#' data("Data_HC_contamination", package = "dbrobust")
#' Data_small <- Data_HC_contamination[1:20, ]
#'
#' # Select only continuous variables
#' cont_vars <- names(Data_small)[1:4]
#' Data_cont <- Data_small[, cont_vars]
#'
#' # Set uniform weights and trimming proportion
#' weights <- rep(1, nrow(Data_cont))
#' alpha <- 0.10
#'
#' # Compute robust covariance with trimming
#' res <- dbrobust:::robust_covariance_gv(Data_cont, weights, alpha)
#'
#' # Inspect results: central observations, outliers, covariance, threshold, proximity
#' res$central_idx
#' res$outlier_idx
#' round(res$S, 4)
#' res$q
#' round(res$phi[1:10], 4)
#'
#' @importFrom stats quantile
#' @keywords internal
robust_covariance_gv <- function (X, w, alpha) {
n <- dim(X)[1] # number of individuals
# Ensure X is numeric matrix
X <- as.matrix(X)
w <- as.numeric(w)
# Normalize weights
w <- w / sum(w)
Dw <- diag(w)
Jw <- diag(n) - rep(1, n) %*% t(w)
Jw <- sqrt(Dw) %*% Jw # element-wise sqrt of Dw
Xw <- Jw %*% X
# Mahalanobis distance
D <- StatMatch::mahalanobis.dist(Xw) # from library(StatMatch)
D2 <- D^2
VG <- 1/2 * t(w) %*% D2 %*% w
# Proximity function for each observation
phi <- numeric(n)
for (i in 1:n) {
phi[i] <- D2[i,] %*% w - VG
}
# Select central subset
q <- quantile(phi, probs = 1 - alpha, type = 5)
central_idx <- which(phi <= q)
outlier_idx <- setdiff(seq_len(n), central_idx)
# Trimmed dataset
Xtrim <- X[central_idx, , drop = FALSE]
w2 <- w[central_idx]
w2 <- w2 / sum(w2)
# Robust covariance calculation
Dw <- diag(w2)
Jw <- diag(length(central_idx)) - rep(1, length(central_idx)) %*% t(w2)
Jw <- sqrt(Dw) %*% Jw
Xw <- Jw %*% Xtrim
S <- t(Xw) %*% Xw
return(list(
S = S,
central_idx = central_idx,
outlier_idx = outlier_idx,
phi = phi,
q = q
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.