Nothing
#' @name ensemble.cluster.multi
#' @title Multi-Method Ensemble Clustering with Graph-based Consensus
#'
#' @description Implements ensemble clustering by combining multiple clustering methods
#' (k-means, hierarchical, and spectral clustering) using a graph-based consensus approach.
#'
#' @param x data.frame or matrix where rows are observations and columns are features
#' @param k_km number of clusters for k-means clustering
#' @param k_hc number of clusters for hierarchical clustering
#' @param k_sc number of clusters for spectral clustering
#' @param n_ref number of reference distributions for stability assessment (default: 3)
#' @param B number of bootstrap samples for stability estimation (default: 100)
#' @param hc.method hierarchical clustering method (default: "ward.D")
#' @param dist_method distance method for spectral clustering (default: "euclidean")
#'
#' @return A list containing:
#' \describe{
#' \item{membership}{Final cluster assignments from ensemble consensus}
#' \item{k_consensus}{Number of clusters found in consensus}
#' \item{individual_results}{List of results from individual clustering methods}
#' \item{stability_measures}{Stability measures for each method}
#' \item{graph}{igraph object of the ensemble graph}
#' }
#'
#' @details
#' This function implements a multi-method ensemble clustering approach that:
#' 1. Applies multiple clustering methods (k-means, hierarchical, spectral)
#' 2. Assesses stability of each clustering through bootstrapping
#' 3. Constructs a weighted bipartite graph representing all clusterings
#' 4. Uses fast greedy community detection for final consensus
#'
#' @importFrom kernlab specc
#' @importFrom stats hclust cutree dist cor var runif prcomp kmeans
#' @importFrom flexclust dist2
#' @importFrom network set.vertex.attribute
#' @importFrom grid unit.c
#' @importFrom dplyr left_join
#'
#' @examples
#' \donttest{
#' data(iris)
#' df <- iris[,1:4]
#' result <- ensemble.cluster.multi(df, k_km=3, k_hc=3, k_sc=3)
#' plot(df[,1:2], col=result$membership, pch=16)
#' }
#'
#' @export
ensemble.cluster.multi <- function(x, k_km, k_hc, k_sc, n_ref=3, B=100, hc.method="ward.D", dist_method="euclidean") {
# Input validation
if (!is.matrix(x) && !is.data.frame(x))
stop("x must be a matrix or data.frame")
if (k_km < 1 || k_hc < 1 || k_sc < 1)
stop("Number of clusters must be positive")
result <- list()
# K-means clustering and stability
results_km <- esmbl.stability(x, k_km, scheme="kmeans", B=B)
Smin_ref_km_frame <- matrix(NA, nrow=1, ncol=n_ref)
for (i_ref in 1:n_ref) {
df_ref <- ref_dist(x)
results_km_ref <- esmbl.stability(df_ref, k_km, B=50, scheme="kmeans")
Smin_ref_km_frame[,i_ref] <- results_km_ref$Smin
}
Smin_ref_km_mean <- mean(Smin_ref_km_frame, na.rm=TRUE)
Smin_delta_km <- max(results_km$Smin - Smin_ref_km_mean, 0.01)
# Hierarchical clustering and stability
results_hc <- esmbl.stability(x, k_hc, scheme="hc", B=B, hc.method=hc.method, dist_method = dist_method)
Smin_ref_hc_frame <- matrix(NA, nrow=1, ncol=n_ref)
for (i_ref in 1:n_ref) {
df_ref <- ref_dist(x)
results_hc_ref <- esmbl.stability(df_ref, k_hc, B=50, scheme="hc")
Smin_ref_hc_frame[,i_ref] <- results_hc_ref$Smin
}
Smin_ref_hc_mean <- mean(Smin_ref_hc_frame, na.rm=TRUE)
Smin_delta_hc <- max(results_hc$Smin - Smin_ref_hc_mean, 0.01)
# Spectral clustering and stability
results_sc <- esmbl.stability(x, k_sc, scheme="spectral", B=B)
Smin_ref_sc_frame <- matrix(NA, nrow=1, ncol=n_ref)
for (i_ref in 1:n_ref) {
df_ref <- ref_dist(x)
results_sc_ref <- esmbl.stability(df_ref, k_sc, B=50, scheme="spectral")
Smin_ref_sc_frame[,i_ref] <- results_sc_ref$Smin
}
Smin_ref_sc_mean <- mean(Smin_ref_sc_frame, na.rm=TRUE)
Smin_delta_sc <- max(results_sc$Smin - Smin_ref_sc_mean, 0.01)
# Create incidence matrix for graph
inc <- matrix(0, nrow=sum(k_km, k_hc, k_sc), ncol=nrow(x))
# Add k-means edges
for (i in 1:k_km) {
clust_mem <- which(results_km$membership == i)
if (length(clust_mem) > 0) {
weights <- results_km$obs_wise[clust_mem] * Smin_delta_km
inc[i, clust_mem] <- weights
}
}
# Add hierarchical clustering edges
for (i in 1:k_hc) {
clust_mem <- which(results_hc$membership == i)
if (length(clust_mem) > 0) {
weights <- results_hc$obs_wise[clust_mem] * Smin_delta_hc
inc[i + k_km, clust_mem] <- weights
}
}
# Add spectral clustering edges
for (i in 1:k_sc) {
clust_mem <- which(results_sc$membership == i)
if (length(clust_mem) > 0) {
weights <- results_sc$obs_wise[clust_mem] * Smin_delta_sc
inc[i + k_km + k_hc, clust_mem] <- weights
}
}
# Remove any empty rows
inc <- inc[rowSums(inc) > 0, , drop=FALSE]
if (nrow(inc) == 0 || ncol(inc) == 0) {
stop("No valid clustering structure found")
}
# Create graph and find communities
graph <- igraph::graph_from_biadjacency_matrix(inc, weighted=TRUE)
communities <- igraph::cluster_fast_greedy(graph, weights=igraph::E(graph)$weight)
# Extract final clustering
n_vertices <- nrow(inc)
consensus_membership <- igraph::membership(communities)[(n_vertices + 1):(ncol(inc) + n_vertices)]
# Prepare return values
result$membership <- consensus_membership
result$k_consensus <- length(unique(consensus_membership))
result$individual_results <- list(
kmeans = results_km,
hierarchical = results_hc,
spectral = results_sc
)
result$stability_measures <- list(
kmeans_delta = Smin_delta_km,
hierarchical_delta = Smin_delta_hc,
spectral_delta = Smin_delta_sc
)
result$graph <- graph
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.