Nothing
#' Non-hierarchical clustering: CLARA
#'
#' This function performs non-hierarchical clustering based on
#' dissimilarity using partitioning around medoids, implemented via
#' the Clustering Large Applications (CLARA) algorithm.
#'
#' @param dissimilarity The output object from [dissimilarity()] or
#' [similarity_to_dissimilarity()], or a `dist` object. If a `data.frame` is
#' used, the first two columns should represent pairs of sites (or any pair of
#' nodes), and the subsequent column(s) should contain the dissimilarity
#' indices.
#'
#' @param index The name or number of the dissimilarity column to use. By
#' default, the third column name of `dissimilarity` is used.
#'
#' @param seed A value for the random number generator (set to `NULL` for random
#' initialization by default).
#'
#' @param n_clust An `integer` vector or a single `integer` specifying the
#' desired number(s) of clusters.
#'
#' @param maxiter An `integer` defining the maximum number of iterations.
#'
#' @param initializer A `character` string, either `"BUILD"` (used in the
#' classic PAM algorithm) or `"LAB"` (Linear Approximate BUILD).
#'
#' @param fasttol A positive `numeric` value defining the tolerance for fast
#' swapping behavior. Defaults to 1.
#'
#' @param numsamples A positive `integer` specifying the number of samples to
#' draw.
#'
#' @param sampling A positive `numeric` value defining the sampling rate.
#'
#' @param independent A `boolean` indicating whether the previous
#' medoids are excluded in the next sample. Defaults to `FALSE`.
#'
#' @param algorithm_in_output A `boolean` indicating whether the original output
#' of [fastclara][fastkmedoids::fastclara] should be included in the output.
#' Defaults to `TRUE` (see Value).
#'
#' @return
#' A `list` of class `bioregion.clusters` with five components:
#' \enumerate{
#' \item{**name**: A `character` string containing the name of the algorithm.}
#' \item{**args**: A `list` of input arguments as provided by the user.}
#' \item{**inputs**: A `list` of characteristics of the clustering process.}
#' \item{**algorithm**: A `list` of all objects associated with the
#' clustering procedure, such as original cluster objects (only if
#' `algorithm_in_output = TRUE`).}
#' \item{**clusters**: A `data.frame` containing the clustering results.}}
#'
#' If `algorithm_in_output = TRUE`, the `algorithm` slot includes the output of
#' [fastclara][fastkmedoids::fastclara].
#'
#' @details
#' Based on [fastkmedoids](https://cran.r-project.org/package=fastkmedoids)
#' package ([fastclara][fastkmedoids::fastclara]).
#'
#' @references
#' Schubert E & Rousseeuw PJ (2019) Faster k-Medoids Clustering: Improving the
#' PAM, CLARA, and CLARANS Algorithms. \emph{Similarity Search and Applications}
#' 11807, 171-187.
#'
#' @seealso
#' For more details illustrated with a practical example,
#' see the vignette:
#' \url{https://biorgeo.github.io/bioregion/articles/a4_2_non_hierarchical_clustering.html}.
#'
#' Associated functions:
#' [nhclu_clarans] [nhclu_dbscan] [nhclu_kmeans] [nhclu_pam] [nhclu_affprop]
#'
#' @author
#' Pierre Denelle (\email{pierre.denelle@gmail.com}) \cr
#' Boris Leroy (\email{leroy.boris@gmail.com}) \cr
#' Maxime Lenormand (\email{maxime.lenormand@inrae.fr})
#'
#'
#' @examples
#' comat <- matrix(sample(0:1000, size = 500, replace = TRUE, prob = 1/1:1001),
#' 20, 25)
#' rownames(comat) <- paste0("Site",1:20)
#' colnames(comat) <- paste0("Species",1:25)
#'
#' dissim <- dissimilarity(comat, metric = "all")
#'
#' #clust <- nhclu_clara(dissim, index = "Simpson", n_clust = 5)
#'
#' @importFrom stats as.dist
#' @importFrom fastkmedoids fastclara
#'
#' @export
nhclu_clara <- function(dissimilarity,
index = names(dissimilarity)[3],
seed = NULL,
n_clust = c(1,2,3),
maxiter = 0,
initializer = "LAB",
fasttol = 1,
numsamples = 5,
sampling = 0.25,
independent = FALSE,
algorithm_in_output = TRUE){
# 1. Controls ----------------------------------------------------------------
controls(args = NULL, data = dissimilarity, type = "input_nhandhclu")
if(!inherits(dissimilarity, "dist")){
controls(args = NULL, data = dissimilarity, type = "input_dissimilarity")
controls(args = NULL, data = dissimilarity,
type = "input_data_frame_nhandhclu")
controls(args = index, data = dissimilarity, type = "input_net_index")
net <- dissimilarity
# Convert tibble into dataframe
if(inherits(net, "tbl_df")){
net <- as.data.frame(net)
}
net[, 3] <- net[, index]
net <- net[, 1:3]
controls(args = NULL, data = net, type = "input_net_index_value")
dist.obj <- stats::as.dist(
net_to_mat(net,
weight = TRUE, squared = TRUE, symmetrical = TRUE))
} else {
controls(args = NULL, data = dissimilarity, type = "input_dist")
dist.obj <- dissimilarity
if(is.null(labels(dist.obj))){
attr(dist.obj, "Labels") <- paste0(1:attr(dist.obj, "Size"))
message("No labels detected, they have been assigned automatically.")
}
}
if(!is.null(seed)){
controls(args = seed, data = NULL, type = "strict_positive_integer")
}
controls(args = n_clust, data = NULL,
type = "strict_positive_integer_vector")
controls(args = maxiter, data = NULL, type = "positive_integer")
controls(args = initializer, data = NULL, type = "character")
if(!(initializer %in% c("BUILD", "LAB"))){
stop(paste0("Please choose initializer from the following:\n",
"BUILD or LAB."),
call. = FALSE)
}
controls(args = fasttol, data = NULL, type = "positive_numeric")
controls(args = numsamples, data = NULL, type = "positive_integer")
controls(args = sampling, data = NULL, type = "positive_numeric")
controls(args = independent, data = NULL, type = "boolean")
controls(args = algorithm_in_output, data = NULL, type = "boolean")
# 2. Function ---------------------------------------------------------------
# Output format
outputs <- list(name = "nhclu_clara")
outputs$args <- list(index = index,
seed = seed,
n_clust = n_clust,
maxiter = maxiter,
initializer = initializer,
fasttol = fasttol,
numsamples = numsamples,
sampling = sampling,
independent = independent,
algorithm_in_output = algorithm_in_output)
outputs$inputs <- list(bipartite = FALSE,
weight = TRUE,
pairwise = TRUE,
pairwise_metric = ifelse(!inherits(dissimilarity,
"dist"),
ifelse(is.numeric(index),
names(net)[3], index),
NA),
dissimilarity = TRUE,
nb_sites = attr(dist.obj, "Size"),
hierarchical = FALSE)
outputs$algorithm <- list()
outputs$clusters <- data.frame(matrix(ncol = 1,
nrow = length(labels(dist.obj)),
dimnames = list(labels(dist.obj),
"name")))
outputs$clusters$name <- labels(dist.obj)
# CLARA algorithm
if(!is.null(seed)){
outputs$algorithm <-
lapply(n_clust,
function(x)
fastkmedoids::fastclara(rdist = dist.obj,
n = nrow(dist.obj),
k = x,
maxiter = maxiter,
initializer = initializer,
fasttol = fasttol,
numsamples = numsamples,
sampling = sampling,
independent = independent,
seed = seed))
}else{
outputs$algorithm <-
lapply(n_clust,
function(x)
fastkmedoids::fastclara(rdist = dist.obj,
n = nrow(dist.obj),
k = x,
maxiter = maxiter,
initializer = initializer,
fasttol = fasttol,
numsamples = numsamples,
sampling = sampling,
independent = independent,
seed = seedrng()))
}
names(outputs$algorithm) <- paste0("K_", n_clust)
outputs$clusters <- data.frame(
outputs$clusters,
data.frame(lapply(names(outputs$algorithm),
function(x) outputs$algorithm[[x]]@assignment)))
outputs$clusters <- knbclu(outputs$clusters, reorder = TRUE)
outputs$cluster_info <- data.frame(
partition_name = names(outputs$clusters)[2:length(outputs$clusters),
drop = FALSE],
n_clust = apply(outputs$clusters[, 2:length(outputs$clusters),
drop = FALSE],
2, function(x) length(unique(x))))
# Set algorithm in output
if (!algorithm_in_output) {
outputs$algorithm <- NA
}
class(outputs) <- append("bioregion.clusters", class(outputs))
return(outputs)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.