Nothing
#' Performs k-means clustering on interval data, allowing for partitioning of
#' data points into distinct clusters.
#'
#' @param x A 3D interval array representing the data to be clustered.
#' @param centers Either the number of clusters to create or a set of
#' pre-initialized cluster centers. If a number is provided, it specifies how
#' many clusters to create.
#' @param nstart The number of times to run the k-means algorithm with different
#' starting values in order to find the best solution (default is 10).
#' @param distance A string specifying the distance metric to use: 'euclid' for
#' Euclidean distance or 'hausdorff' for Hausdorff distance (default is
#' 'euclid').
#' @param trace Logical value indicating whether to show progress of the
#' algorithm (default is `FALSE`).
#' @param iter.max Maximum number of iterations allowed for the k-means
#' algorithm (default is 20).
#' @return A list of clustering results, including:
#' - `cluster`: A vector indicating the cluster assignment of each data point.
#' - `centers`: The final cluster centers.
#' - `totss`: Total sum of squares.
#' - `withinss`: Within-cluster sum of squares by cluster.
#' - `tot.withinss`: Total within-cluster sum of squares.
#' - `betweenss`: Between-cluster sum of squares.
#' - `size`: The number of points in each cluster.
#' - `iter`: Number of iterations the algorithm executed.
#' @useDynLib COveR, .registration = TRUE
#' @export
#' @examples
#' ikmeans(iaggregate(iris, col = 5), 2)
#' ikmeans(iaggregate(iris, col = 5), iaggregate(iris, col = 5))
ikmeans <- function( # nolint cyclocomp_linter
x, centers,
nstart = 10,
distance = "euclid",
trace = FALSE,
iter.max = 20 # nolint object_name_linter
) {
# Check input validity
stopifnot(
"Data must be interval" = is.interval(x),
"'nstart' must be > 0" = is.numeric(nstart) && nstart > 0,
"'trace' must be logical" = is.logical(trace),
"'iter.max' must be > 0" = is.numeric(iter.max) && iter.max > 0
)
# Set the distance measure
dist <- switch(
distance,
"euclid" = 0,
"hausdorff" = 1,
stop("Unknown distance type. Use 'euclid' or 'hausdorff'.")
)
# Handle centers input
if (is.numeric(centers)) {
if (centers > 0 && centers <= nrow(x$inter)) {
nc <- centers
c <- NULL
} else {
stop("The number of clusters must be between 1 and the number of rows.")
}
} else if (is.interval(centers) || is.matrix(centers) ||
is.vector(centers) || is.array(centers)) {
centers <- as.interval(centers)
if (dim(centers$inter)[3] != dim(x$inter)[3]) {
stop("'x' and 'centers' must have the same number of intervals.")
}
nc <- dim(centers$inter)[1]
c <- as.numeric(as.vector(centers$inter))
} else {
stop("'centers' must be a number, interval, vector, or matrix.")
}
# Call the underlying C function for k-means clustering
d <- dim(x$inter)
n <- dimnames(x$inter)
v <- as.numeric(as.vector(x$inter))
c <- .Call(
"_ikmeans", v, d[1], d[2], d[3],
nc, nstart, dist, trace, iter.max, c
)
# Naming
dimnames(c[[2]]) <- list(1:nc, n[[2]], n[[3]])
# Remove empty cluster
centers <- c[[2]][!rowSums(!is.finite(c[[2]])), , ]
# Ensure 3D array format if there is only one cluster
if (dim(centers)[1] == 1 && length(dim(centers)) < 3) {
centers <- array(as.vector(centers), dim = list(1, 2, d[3]))
}
cluster <- c[[1]]
centers <- as.interval(centers)
totss <- c[[3]]
wss <- c[[4]]
totwss <- c[[5]]
bss <- totss - totwss
size <- as.vector(table(cluster))
iter <- c[[6]]
# Return the clustering results as a structured list
structure(list(
cluster = cluster,
centers = centers,
totss = totss,
withinss = wss,
tot.withinss = totwss,
betweenss = bss,
size = size,
iter = iter
), class = "ikmeans")
}
#' Displays the results of ikmeans clustering in a readable format.
#'
#' @param x An `ikmeans` object resulting from the `ikmeans` function.
#' @param ... Additional arguments passed to print().
#' @return No return value, it prints the clustering results to the console.
#' @export
print.ikmeans <- function(x, ...) {
cat("Ikmeans clustering with", length(x$size), "clusters of sizes:",
paste(x$size, collapse = ", "), "\n")
cat("\nCluster centers:\n")
print(x$centers, ...)
cat("\nClustering vector:\n")
print(x$cluster, ...)
cat("\nWithin-cluster sum of squares by cluster:\n")
print(x$withinss, ...)
cat(sprintf(" (Between_SS / Total_SS = %5.1f%%)\n",
100 * x$betweenss / x$totss))
cat("Available components:\n")
print(names(x))
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.