R/prepare_data.R
In clusterWebApp: Universal Clustering Analysis Platform

Documented in prepare_data

utils::globalVariables(c("iris", "USArrests", "mtcars", "CO2", "swiss"))
#' Prepare Built-in Datasets for Clustering
#'
#' Loads and preprocesses a built-in dataset for clustering analysis.
#' Depending on the dataset name provided, different cleaning steps are applied.
#'
#' @param dataset A string specifying the dataset name. Options are: "iris", "USArrests", "mtcars", "CO2", "swiss", "Moons".
#'
#' @return A cleaned \code{data.frame} containing only numeric variables and no missing values.
#'
#' @details
#' \describe{
#'   \item{iris}{The classic iris dataset, excluding the species column.}
#'   \item{USArrests}{State-wise arrest data. Missing values are removed.}
#'   \item{mtcars}{Motor trend car data set. No transformation applied.}
#'   \item{CO2}{CO2 uptake in grass plants. Only numeric columns are selected and rows with missing values are removed.}
#'   \item{swiss}{Swiss fertility and socio-economic indicators. Used as-is.}
#'   \item{Moons}{Synthetic non-linear dataset generated by \code{mlbench::mlbench.smiley()}.}
#' }
#'
#' @examples
#' data <- prepare_data("iris")
#' head(data)
#'
#' @importFrom stats na.omit
#' @importFrom tidyr drop_na
#' @importFrom mlbench mlbench.smiley
#' @import datasets
#' @export
prepare_data <- function(dataset) {
  switch(dataset,
         "iris" = iris[, -5],
         "USArrests" = na.omit(USArrests),
         "mtcars" = mtcars,
         "CO2" = tidyr::drop_na(CO2[, sapply(CO2, is.numeric)]),
         "swiss" = swiss,
         "Moons" = as.data.frame(mlbench::mlbench.smiley(200, sd1 = 0.05)$x))
}