R/ClusterCVAdjusted.R

Defines functions ClusterCVAdjusted

Documented in ClusterCVAdjusted

#' Estimating Number of Cluster with Gabriel Cross-validation (adjusted)
#'
#' This function uses results of ClusterCVMain function and applies an
#' adjustment to account for high correlation between dimensions.
#'
#' @param input.data - data matrix
#' @param k.star - optimized number of clusters
#' @param cl.assign - cluster assignment for each observation
#' @return Newly adjusted data matrix to be used in ClusterCV
#'
#' @import dplyr pracma
#'
#' @export
ClusterCVAdjusted <- function(input.data, k.star, cl.assign){
  # Update the mean value of the columns for each cluster
  groups.means <- cbind(input.data, cl.assign) %>% dplyr::group_by(cl.assign) %>% dplyr::summarise(across(everything(), mean)) %>% dplyr::select(-1)

  # Compute the error covariance matrix
  covar.c <- matrix(0, nrow=ncol(input.data), ncol=ncol(input.data))
  for (i in 1:nrow(input.data)){
    x<-input.data[i,1:ncol(input.data)] - groups.means[cl.assign[i],]
    x <- as.numeric(x)
    covar.c <- covar.c + x %o% x
  }

  # Compute the rotated/rescaled data
  covar.c <- covar.c / (nrow(input.data) - k.star)
  eig.c <- eigen(covar.c)
  Q.mat <- pracma::randortho(n=ncol(covar.c),type="orthonormal")
  input.data.new <- as.matrix(input.data) %*% eig.c$vectors %*% diag(abs(eig.c$values)^-.5) %*% Q.mat

  # Return new dataset
  return(input.data.new)
}
pangoria/clusterEstimation documentation built on Dec. 22, 2021, 6:39 a.m.