# Functions wrapping the variable clustering used in ClustOfVar
#' Explore the clustering structure in a set of variables
#'
#' This function wraps the clustering functionality from ClustOfVar into a more
#' user-friendly format. Use this function to perform the exploratory
#' heirarchical clustering, with the aim of deciding the number of final clusters
#' to use
#'
#' @param dat data.frame/tbl containing the variables to cluster. Be sure to only
#' include input variables; no internal filtering is performed
#' @return an object of class \code{mtClust} containing elements \code{hclust},
#' the object showing the heirarchical clustering, and \code{stab},
#' showing the graph used to select the number of clusters. You can call
#' \code{plot} (which in turn calls \code{plot.hclustvar} or
#' \code{plot.clustab}) on either output to see the plots, which is useful
#' for deciding how many clusters to keep
#' @details
#' The function internally tries to avoid copying of the input data as much as
#' possible, however the implementation of \code{ClustOfVar} makes an extraordinary
#' number of copies by default. This method may be prohibitively slow/memory
#' intensive on large datasets. The output of this function is designed to be used
#' as input to \code{variable_cluster}, which actually maps variables to clusters
#' @family clustering
#' @seealso \code{\link{variable_cluster}}
#' @export
#' @import magrittr
#' @examples
#' exploratory_cluster(iris)
#'
exploratory_cluster <- function(dat) {
### Split dataset into categorical + numeric variables
is_num <- sapply(dat,is.numeric) # Which columns are numeric?
ds <- list(
quanti = data.frame(dat[,is_num]),
quali = data.frame(dat[,!is_num])
)
colnames(ds$quanti) <- colnames(dat)[is_num]
colnames(ds$quali) <- colnames(dat)[!is_num]
### Apply hclustvar
# Note: by default this returns the original dataframe; avoid this behaviour
if (ncol(ds$quali) == 0) {
hclust_init <- ClustOfVar::hclustvar(X.quanti = ds$quanti)[c("call",
"rec",
"merge",
"height",
"order",
"labels",
"clusmat")]
} else if (ncol(ds$quanti) == 0) {
hclust_init <- ClustOfVar::hclustvar(X.quali = ds$quali)[c("call",
"rec",
"merge",
"height",
"order",
"labels",
"clusmat")]
} else {
hclust_init <- ClustOfVar::hclustvar(X.quanti = ds$quanti,
X.quali = ds$quali)[c("call",
"rec",
"merge",
"height",
"order",
"labels",
"clusmat")]
}
# Reinstate the class of the original hclust object
# This will facilitate plotting
class(hclust_init) <- c("hclustvar","hclust")
# However, this will not allow for calculating stability
# Function to re-form the object with the correct class:
reform_hclust <- function(obj) {
obj[["X.quanti"]] <- obj$rec$X.quanti
obj[["X.quali"]] <- obj$rec$X.quali
class(obj) <- c("hclustvar","hclust")
return(obj)
}
### Cluster stability
hclust_stability <- ClustOfVar::stability(reform_hclust(hclust_init),
B = 25)
### Return a list with the hclust and stability objects
clust_out <- list(
hclust = hclust_init,
stab = hclust_stability
)
class(clust_out) <- "mtClust"
return(clust_out)
}
#' Map variables to clusters determined using heirarchical clustering
#'
#' This function takes the output of \code{exploratory_clusters} and maps
#' the variables in the dataset to a user-specified number of clusters.
#' @param clust An object of class "mtClust" obtained as output from the
#' \code{exploratory_cluster} function
#' @param G Number of clusters, from \code{1} to \code{p} where \code{p} is
#' the number of variables in the dataset
#' @return a tbl with two columns: \code{variable}, listing the variable,
#' names, and \code{cluster}, giving a numeric indication of the
#' clusters. Variables with the same value of \code{cluster}
#' belong to the same cluster
#' @details As with other functions in the \code{modellingTools} package,
#' this function mainly serves to wrap the functionality of the
#' variable clustering provided by \code{ClustOfVars}, providing
#' a consistent input/output interface. This function must be
#' combined with the \code{exploratory_cluster} function to work;
#' selecting the number of clusters is a subjective process that
#' should not be automated
#' @seealso \code{\link{exploratory_cluster}}
#' @export
#' @import magrittr
#' @examples
#' clust <- exploratory_cluster(iris)
#' variable_cluster(clust,2)
variable_cluster <- function(clust,G) UseMethod("varclust")
varclust.mtClust <- function(clust,G) {
clusters <- ClustOfVar::cutreevar(clust$hclust,
k = G)$cluster
return(dplyr::data_frame(
"variable" = names(clusters),
"cluster" = clusters
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.