R/data.R
In kmed: Distance-Based k-Medoids

#' 4-clustered data set
#'
#' A dataset containing two variables of 300 objects and their class memberships
#' generated by the \pkg{clusterGeneration} package.
#'
#' @format A data frame with 300 rows and 3 variables:
#' \describe{
#'   \item{x1}{X1.}
#'   \item{x2}{X2.}
#'   \item{class}{Class membership.}
#' }
#' @source Data is generated via the \code{genRandomClust} function in the
#' \pkg{clusterGeneration} package. The code to generate this data
#' set is
#'
#' set.seed(2016)
#'
#' randclust <- clusterGeneration::genRandomClust(4, sepVal = 0.001,
#' numNonNoisy = 2, numReplicate = 1, clustszind = 3,
#' clustSizes = as.numeric(table(sample(1:4, 300, replace = TRUE))),
#' outputDatFlag=FALSE, outputLogFlag=FALSE, outputEmpirical=FALSE,
#' outputInfo=FALSE)
#'
#' clust4 <- as.data.frame(randclust$datList$test_1)
#'
#' clust4$class <- randclust$memList$test_1
#'
#' @references Qiu, W., and H. Joe. 2015. ClusterGeneration: Random Cluster
#' Generation (with Specified Degree of Separation).
#' @references Qiu, W., and H. Joe. 2006a. Generation of Random Clusters with
#' Specified Degree of Separation. Journal of Classification 23 pp. 315-34.
#' @references Qiu, W., and H. Joe. 2006b. Separation Index and Partial
#' Membership for Clustering. Computational Statistics and Data Analysis 50
#' pp. 585-603.
"clust4"

#' 5-clustered data set
#'
#' A dataset containing two variables of 800 objects and their class memberships
#' generated by the \pkg{clusterGeneration} package.
#'
#' @format A data frame with 800 rows and 3 variables:
#' \describe{
#'   \item{x1}{X1.}
#'   \item{x2}{X2.}
#'   \item{class}{Class membership.}
#' }
#' @source Data is generated via the \code{genRandomClust} function in the
#' \pkg{clusterGeneration} package. The code to generate this data
#' set is
#'
#' set.seed(2016)
#'
#' randclust <- clusterGeneration::genRandomClust(5, sepVal = 0.2,
#' numNonNoisy = 2, numReplicate = 1, clustszind = 3,
#' clustSizes = as.numeric(table(sample(1:5, 800, replace = TRUE))),
#' outputDatFlag=FALSE, outputLogFlag=FALSE, outputEmpirical=FALSE,
#' outputInfo=FALSE)
#'
#' clust5 <- as.data.frame(randclust$datList$test_1)
#'
#' clust5$class <- randclust$memList$test_1
#'
#' @references Qiu, W., and H. Joe. 2015. ClusterGeneration: Random Cluster
#' Generation (with Specified Degree of Separation).
#' @references Qiu, W., and H. Joe. 2006a. Generation of Random Clusters with
#' Specified Degree of Separation. Journal of Classification 23 pp. 315-34.
#' @references Qiu, W., and H. Joe. 2006b. Separation Index and Partial
#' Membership for Clustering. Computational Statistics and Data Analysis 50
#' pp. 585-603.
"clust5"

#' Global food security index
#'
#' A dataset containing four variables of 113 countries for their food security
#' index based on panelists evaluation in 2017.
#'
#' @format A data frame with 113 rows and 4 variables:
#' \describe{
#'   \item{affordability}{Index of food affordability.}
#'   \item{availability}{Index of food availability.}
#'   \item{safety}{Index of food quality and safety.}
#'   \item{resilience}{Index of natural resources and resilience.}
#' }
#' @source The original indicator variables consist of 27 variables. Then,
#' they are summarized into four pillars of food security; they are
#' affordability, availability, quality and safety, and natural resources
#' and resilience. Food-security expertise panelists evaluate the score of
#' each country from 0 to 100, where 0 is the least favorable towards food
#' security.
#'
#' \url{https://impact.economist.com/sustainability/project/food-security-index/}
"globalfood"

#' Heart Disease data set
#'
#' A mixed variable dataset containing 14 variables of 297 patients for
#' their heart disease diagnosis.
#'
#' @format A data frame with 297 rows and 14 variables:
#' \describe{
#'   \item{age}{Age in years (numerical).}
#'   \item{sex}{Sex: 1 = male, 0 = female (logical).}
#'   \item{cp}{Four chest pain types: (1) typical angina, (2) atypical angina
#'   (3)non-anginal pain, (4) asymptomatic (categorical).}
#'   \item{trestbps}{Resting blood pressure (in mm Hg on admission to
#'   the hospital) (numerical).}
#'   \item{chol}{Serum cholestoral in mg/dl (numerical).}
#'   \item{fbs}{Fasting blood sugar more than 120 mg/dl (logical).}
#'   \item{restecg}{Resting electrocardiographic results: (0) normal,
#'   (1) having ST-T wave abnormality, (2) showing probable or definite
#'   left ventricular hypertrophy by Estes' criteria (categorical).}
#'   \item{thalach}{Maximum heart rate achieved (numerical).}
#'   \item{exang}{Exercise induced angina (logical).}
#'   \item{oldpeak}{ST depression induced by exercise relative to
#'   rest (numerical).}
#'   \item{slope}{The slope of the peak exercise ST segment: (1) upsloping,
#'   (2) flat, (3) downsloping (categorical).}
#'   \item{ca}{Number of major vessels (0-3) colored by flourosopy (numerical).}
#'   \item{thal}{(3) normal, (6) fixed defect, (7) reversable defect
#'   (categorical).}
#'   \item{class}{Diagonosis of heart disease (4 classes). It can be 2 classes
#'   by setting 0 for 0 values and 1 for non-0 values.}
#' }
#' @source The data set is taken from machine learning repository of UCI.
#' The original data set consists of 303 patients with 6 NA's. Then,
#' the missing values are omitted such that it reduces into 297 patients.
#'
#' \url{https://archive.ics.uci.edu/ml/datasets/Heart+Disease}
#'
#' @references Lichman, M. (2013). UCI machine learning repository.
"heart"