#' Estimating Number of Cluster with Gabriel Cross-validation (adjusted)
#'
#' This function uses results of ClusterCVMain function and applies an
#' adjustment to account for high correlation between dimensions.
#'
#' @param input.data - data matrix
#' @param k.star - optimized number of clusters
#' @param cl.assign - cluster assignment for each observation
#' @return Newly adjusted data matrix to be used in ClusterCV
#'
#' @import dplyr pracma
#'
#' @export
ClusterCVAdjusted <- function(input.data, k.star, cl.assign){
# Update the mean value of the columns for each cluster
groups.means <- cbind(input.data, cl.assign) %>% dplyr::group_by(cl.assign) %>% dplyr::summarise(across(everything(), mean)) %>% dplyr::select(-1)
# Compute the error covariance matrix
covar.c <- matrix(0, nrow=ncol(input.data), ncol=ncol(input.data))
for (i in 1:nrow(input.data)){
x<-input.data[i,1:ncol(input.data)] - groups.means[cl.assign[i],]
x <- as.numeric(x)
covar.c <- covar.c + x %o% x
}
# Compute the rotated/rescaled data
covar.c <- covar.c / (nrow(input.data) - k.star)
eig.c <- eigen(covar.c)
Q.mat <- pracma::randortho(n=ncol(covar.c),type="orthonormal")
input.data.new <- as.matrix(input.data) %*% eig.c$vectors %*% diag(abs(eig.c$values)^-.5) %*% Q.mat
# Return new dataset
return(input.data.new)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.