Nothing
#' Compute Robust Generalized Gower Distance
#'
#' Computes a weighted, robust version of the Gower distance for mixed-type data
#' (continuous, binary, categorical). Continuous variables are handled via a
#' robust Mahalanobis distance using a supplied robust covariance matrix.
#' Binary and categorical variables are transformed into distances via similarity coefficients.
#'
#' @param data Numeric matrix or data frame with all variables combined.
#' @param w Numeric vector of weights for each observation. Will be normalized internally.
#' @param p Integer vector of length 3: \code{c(#cont, #binary, #categorical)}.
#' @param robust_cov Robust covariance matrix for continuous variables.
#'
#' @details
#' The function computes distances separately for continuous, binary, and categorical variables,
#' then scales each by its geometric variability and combines them. The output is a normalized
#' squared distance matrix suitable for robust clustering or aggregation procedures.
#'
#' Continuous distances are Mahalanobis distances: \eqn{(x-y)^T (S)^-1 (x-y)}.
#' Categorical distances use a matching coefficient.
#' Binary distances are modified to account for positive/negative matches.
#'
#' @return A numeric matrix of squared robust Gower distances, normalized by geometric variability.
#' @keywords internal
robust_ggower<-function(data,w,p,robust_cov){ # data: data matrix, w: weights, p: 3-vector, robust_cov: robust covariance matrix
pc<-p[1] # number of continuous variables
pb<-p[2] # number of binary variables
pq<-p[3] # number of categorical variables
n<-dim(data)[1] # number of individuals
w<-w/sum(w) # weights
# ------------------------------------------------------------------
# Quantitative variables
# ------------------------------------------------------------------
numvar<-function(x,y,robust_cov){ # x,y: row vectors
inv_robust_cov<-solve(robust_cov) # inverse of robust covariance matrix
md<-t(x-y)%*%inv_robust_cov%*%(x-y) # Mahalanobis distance
}
# ------------------------------------------------------------------
# Categorical variables
# ------------------------------------------------------------------
categvar<-function(x,y){ # x,y: row vectors
nmatch<-0 # number of matches
nmatch<-sum(x==y) # number of matches
s<-nmatch/length(x) # similarity coefficient
d<-2*(1-s) # distance
}
# ------------------------------------------------------------------
# Binary variables
# ------------------------------------------------------------------
binaryvar<-function(x,y){ # x,y: row vectors
nposmatch<-0 # number of positive matches
nnegmatch<-0 # number of negative matches
nposmatch<-sum(x*y) # number of positive matches
nnegmatch<-sum((x-y)==0)-sum(x*y) # number of negative matches
if (nnegmatch==length(x)){ # if all binary variables are different
s<-0 # similarity coefficient
}
else{ # if there are some binary variables that are the same
s<-nposmatch/(length(x)-nnegmatch) # similarity coefficient
}
d<-2*(1-s) # distance
}
# ------------------------------------------------------------------
if (pc>0){ # if there are continuous variables
D_num<-proxy::dist(x=data[,1:pc],y=NULL,method=numvar,robust_cov) # distance matrix
D_num <- dbstats::as.D2(D_num) # D2 matrix
gv_num <- 1/2*t(w)%*%D_num%*%w # geometric variability
gv_num <- as.numeric(gv_num) # numeric
}
else{ # if there are no continuous variables
D_num<-0 # distance matrix
gv_num<-1 # geometric variability
}
if (pb>0){ # if there are binary variables
D_bin<-proxy::dist(x=data[,(pc+1):(pc+pb)],y=NULL,method=binaryvar) # distance matrix
D_bin<-dbstats::as.D2(D_bin) # D2 matrix
gv_bin<-1/2*t(w)%*%D_bin%*%w # geometric variability
gv_bin<-as.numeric(gv_bin) # numeric
}
else{ # if there are no binary variables
D_bin<-0 # distance matrix
gv_bin<-1 # geometric variability
}
if (pq>0){ # if there are categorical variables
D_cat<-proxy::dist(x=data[,(pc+pb+1):(pc+pb+pq)],y=NULL,method=categvar) # distance matrix
D_cat<-dbstats::as.D2(D_cat) # D2 matrix
gv_cat<-1/2*t(w)%*%D_cat%*%w # geometric variability
gv_cat<-as.numeric(gv_cat) # numeric
}
else{ # if there are no categorical variables
D_cat<-0 # distance matrix
gv_cat<-1 # geometric variability
}
# ------------------------------------------------------------------
D<-(D_num/gv_num)+(D_bin/gv_bin)+(D_cat/gv_cat) # Gower distance
gv_D<-1/2*t(w)%*%D%*%w # geometric variability
gv_D<-as.numeric(gv_D ) # numeric
robust_gower_dist<-D/gv_D # robust Gower distance
return(robust_gower_dist)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.