R/cluster_performance.R

Defines functions cluster_performance.parameters_clusters cluster_performance.dbscan cluster_performance.hclust cluster_performance.kmeans cluster_performance

Documented in cluster_performance cluster_performance.dbscan cluster_performance.hclust cluster_performance.kmeans cluster_performance.parameters_clusters

#' Performance of clustering models
#'
#' Compute performance indices for clustering solutions.
#'
#' @inheritParams model_parameters.kmeans
#'
#' @examples
#' # kmeans
#' model <- kmeans(iris[1:4], 3)
#' cluster_performance(model)
#' @export
cluster_performance <- function(model, ...) {
  UseMethod("cluster_performance")
}


#' @rdname cluster_performance
#' @export
cluster_performance.kmeans <- function(model, ...) {
  out <- as.data.frame(model[c("totss", "betweenss", "tot.withinss")])
  colnames(out) <- c("Sum_Squares_Total", "Sum_Squares_Between", "Sum_Squares_Within")

  out$R2 <- out$Sum_Squares_Between / out$Sum_Squares_Total

  row.names(out) <- NULL
  class(out) <- c("performance_model", class(out))

  out
}





#' @rdname cluster_performance
#' @examples
#' # hclust
#' data <- iris[1:4]
#' model <- hclust(dist(data))
#' clusters <- cutree(model, 3)
#'
#' rez <- cluster_performance(model, data, clusters)
#' rez
#' @export
cluster_performance.hclust <- function(model, data, clusters, ...) {
  if (is.null(data)) {
    insight::format_error(
      "This function requires the data used to compute the clustering to be provided via `data` as it is not accessible from the clustering object itself."
    )
  }
  if (is.null(clusters)) {
    insight::format_error(
      "This function requires a vector of clusters assignments of same length as data to be passed, as it is not contained in the clustering object itself."
    )
  }

  params <- model_parameters(model, data = data, clusters = clusters, ...)

  cluster_performance(params)
}


#' @rdname cluster_performance
#' @examplesIf require("dbscan", quietly = TRUE)
#' # DBSCAN
#' model <- dbscan::dbscan(iris[1:4], eps = 1.45, minPts = 10)
#'
#' rez <- cluster_performance(model, iris[1:4])
#' rez
#' @export
cluster_performance.dbscan <- function(model, data, ...) {
  if (is.null(data)) {
    insight::format_error(
      "This function requires the data used to compute the clustering to be provided via `data` as it is not accessible from the clustering object itself."
    )
  }

  params <- model_parameters(model, data = data, ...)

  cluster_performance(params)
}


# Base --------------------------------------------------------------------



#' @rdname cluster_performance
#' @examples
#' # Retrieve performance from parameters
#' params <- model_parameters(kmeans(iris[1:4], 3))
#' cluster_performance(params)
#' @export
cluster_performance.parameters_clusters <- function(model, ...) {
  valid <- model$Cluster != 0 & model$Cluster != "0" # Valid clusters
  out <- data.frame(
    Sum_Squares_Total = attributes(model)$Sum_Squares_Total,
    Sum_Squares_Between = attributes(model)$Sum_Squares_Between,
    Sum_Squares_Within = sum(model$Sum_Squares[valid], na.rm = TRUE)
  )

  out$R2 <- out$Sum_Squares_Between / out$Sum_Squares_Total

  class(out) <- c("performance_model", class(out))

  out
}
easystats/parameters documentation built on Nov. 10, 2024, 3:33 p.m.