R/ClusterCV.R

Defines functions ClusterCV

Documented in ClusterCV

#' Estimating Number of Cluster with Gabriel Cross-validation
#'
#' This function determines the number of clusters using Gabriel
#' cross-validation, with the option of using an adjustment for when
#' dimensional correlation is high.
#'
#' @param input.data - data matrix
#' @param kclust.min - minimum number of clusters
#' @param kclust.max - maximum number of clusters
#' @param nfold.r - number of folds row-wise
#' @param nfold.c - number of folds column-wise
#' @param adjusted - T/F, default is FALSE
#' @param alg - clustering algorithm used k-means or spectral, with k-means as
#'              the default
#' @return Returns a list of the final estimation of the number of clusters that
#'         minimizes the cross-validation error and the cluster assignments for
#'         each observation.
#'
#' @import fields dplyr stats kernlab pracma
#'
#' @export
ClusterCV <- function(input.data, kclust.min, kclust.max, nfold.r, nfold.c, adjusted=F, alg="k.means"){

  # Remove columns without any data
  input.data <- input.data[,colSums(abs(input.data))>0]

  # Get Kstar
  opt.c <- ClusterCVMain(input.data, kclust.min, kclust.max, nfold.r,nfold.c, alg)

  # If we wanted to include adjustment,
  # get the rescaled data, and then rerun ClusterCVMain
  if (adjusted){
    new.data <- ClusterCVAdjusted(input.data, opt.c$kstar, opt.c$clust.assign)
    opt.c <- ClusterCVMain(data.frame(new.data), kclust.min, kclust.max, nfold.r, nfold.c, alg)
  }

  # Return output
  return(opt.c)
}
pangoria/clusterEstimation documentation built on Dec. 22, 2021, 6:39 a.m.